# import relevant modules
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import statsmodels.formula.api as sn
import scipy.stats as stats
from matplotlib.backends.backend_pdf import PdfPages
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
from patsy import dmatrices
%matplotlib inline
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Settings
pd.set_option('display.max_columns', None)
np.set_printoptions(threshold=np.nan)
np.set_printoptions(precision=3)
sns.set(style="darkgrid")
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
custdata_df = pd.read_excel("Data Set.xlsx", sheetname="customer_dbase")
custdata_df.sample(5)
custid | region | townsize | gender | age | agecat | birthmonth | ed | edcat | jobcat | union | employ | empcat | retire | income | lninc | inccat | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | default | jobsat | marital | spoused | spousedcat | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | homeown | hometype | address | addresscat | cars | carown | cartype | carvalue | carcatvalue | carbought | carbuy | commute | commutecat | commutetime | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardbenefit | cardfee | cardtenure | cardtenurecat | card2 | card2type | card2benefit | card2fee | card2tenure | card2tenurecat | cardspent | card2spent | active | bfast | tenure | churn | longmon | lnlongmon | longten | lnlongten | tollfree | tollmon | lntollmon | tollten | lntollten | equip | equipmon | lnequipmon | equipten | lnequipten | callcard | cardmon | lncardmon | cardten | lncardten | wireless | wiremon | lnwiremon | wireten | lnwireten | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | hourstv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
394 | 8512-KZJXAA-A34 | 5 | 2.0 | 0 | 58 | 5 | July | 13 | 2 | 3 | 0 | 13 | 4 | 0 | 54 | 3.988984 | 3 | 16.4 | 1.186704 | 0.171180 | 7.669296 | 2.037225 | 0 | 4 | 0 | -1 | -1 | 1 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 7 | 1 | 1 | 25 | 4 | 3 | 1 | 0 | 28.0 | 2 | 0 | 1 | 1 | 1 | 23.0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 5 | 0 | 0 | 1 | 3 | 3 | 2 | 0 | 18 | 5 | 2 | 2 | 1 | 0 | 14 | 4 | 226.22 | 62.82 | 1 | 3 | 43 | 0 | 9.10 | 2.208274 | 399.10 | 5.989212 | 1 | 37.25 | 3.617652 | 1616.55 | 7.388050 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 32.50 | 3.481240 | 1395.0 | 7.240650 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 18 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 |
218 | 8477-FURXBL-V98 | 1 | 1.0 | 0 | 47 | 4 | March | 15 | 3 | 3 | 1 | 8 | 3 | 0 | 45 | 3.806662 | 2 | 19.7 | 2.526525 | 0.926845 | 6.338475 | 1.846638 | 0 | 5 | 0 | -1 | -1 | 1 | 2 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 24 | 4 | 1 | 1 | 1 | 25.3 | 2 | 0 | 1 | 3 | 2 | 14.0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 4 | 1 | 4 | 0 | 4 | 2 | 3 | 2 | 3 | 0 | 6 | 3 | 42.69 | 12.08 | 0 | 3 | 7 | 0 | 4.05 | 1.398717 | 27.50 | 3.314186 | 0 | 0.00 | NaN | 0.00 | NaN | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 1 | 17 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
4015 | 0409-MMPGJY-ECA | 1 | 1.0 | 1 | 60 | 5 | January | 12 | 2 | 5 | 1 | 27 | 5 | 0 | 79 | 4.369448 | 4 | 7.7 | 1.344343 | 0.295905 | 4.738657 | 1.555754 | 0 | 2 | 0 | -1 | -1 | 1 | 2 | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 9 | 3 | 0 | -1 | -1 | -1.0 | -1 | -1 | 1 | 2 | 1 | 15.0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 6 | 1 | 0 | 1 | 1 | 1 | 2 | 0 | 19 | 5 | 4 | 2 | 2 | 0 | 11 | 4 | 1249.83 | 881.78 | 0 | 3 | 30 | 0 | 8.15 | 2.098018 | 271.55 | 5.604146 | 1 | 17.00 | 2.833213 | 555.60 | 6.320049 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 5.50 | 1.704748 | 155.0 | 5.043425 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 14 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
4407 | 9411-CNRRPX-2HW | 3 | 2.0 | 0 | 56 | 5 | July | 17 | 4 | 1 | 0 | 16 | 5 | 0 | 219 | 5.389072 | 5 | 7.0 | 5.212200 | 1.651002 | 10.117800 | 2.314296 | 0 | 2 | 0 | -1 | -1 | 1 | 5 | 0 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 19 | 4 | 2 | 1 | 0 | 46.1 | 3 | 0 | 1 | 1 | 1 | 15.0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 0 | 0 | 1 | 3 | 1 | 4 | 0 | 33 | 5 | 1 | 1 | 2 | 0 | 23 | 5 | 678.79 | 96.49 | 0 | 3 | 63 | 0 | 23.50 | 3.157000 | 1508.30 | 7.318738 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 38.2 | 3.642836 | 2324.95 | 7.751454 | 1 | 38.50 | 3.650658 | 2400.0 | 7.783224 | 1 | 23.7 | 3.165475 | 1406.55 | 7.248895 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 12 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 |
1044 | 1890-WDOXSL-5H1 | 2 | 3.0 | 0 | 20 | 2 | May | 10 | 1 | 6 | 1 | 3 | 2 | 0 | 19 | 2.944439 | 1 | 3.0 | 0.369930 | -0.994441 | 0.200070 | -1.609088 | 0 | 2 | 1 | 11 | 1 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 1 | 0 | 9.6 | 1 | 0 | 1 | 1 | 1 | 19.0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 5 | 0 | 0 | 0 | 3 | 3 | 3 | 0 | 2 | 2 | 4 | 3 | 1 | 0 | 2 | 2 | 362.36 | 163.95 | 1 | 3 | 24 | 0 | 3.75 | 1.321756 | 69.50 | 4.241327 | 0 | 0.00 | NaN | 0.00 | NaN | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 7.25 | 1.981001 | 175.0 | 5.164786 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 22 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
# Find column information in the dataframe.
custdata_df.columns
Index(['custid', 'region', 'townsize', 'gender', 'age', 'agecat', 'birthmonth', 'ed', 'edcat', 'jobcat', ... 'owncd', 'ownpda', 'ownpc', 'ownipod', 'owngame', 'ownfax', 'news', 'response_01', 'response_02', 'response_03'], dtype='object', length=130)
#To create Y we need to sumup cardspent(first card spent amount) and card2spent(Second card spent amount)
custdata_df['totalspend'] = custdata_df['cardspent'] + custdata_df['card2spent']
custdata_df.head()
custid | region | townsize | gender | age | agecat | birthmonth | ed | edcat | jobcat | union | employ | empcat | retire | income | lninc | inccat | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | default | jobsat | marital | spoused | spousedcat | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | homeown | hometype | address | addresscat | cars | carown | cartype | carvalue | carcatvalue | carbought | carbuy | commute | commutecat | commutetime | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardbenefit | cardfee | cardtenure | cardtenurecat | card2 | card2type | card2benefit | card2fee | card2tenure | card2tenurecat | cardspent | card2spent | active | bfast | tenure | churn | longmon | lnlongmon | longten | lnlongten | tollfree | tollmon | lntollmon | tollten | lntollten | equip | equipmon | lnequipmon | equipten | lnequipten | callcard | cardmon | lncardmon | cardten | lncardten | wireless | wiremon | lnwiremon | wireten | lnwireten | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | hourstv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | totalspend | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3964-QJWTRG-NPN | 1 | 2.0 | 1 | 20 | 2 | September | 15 | 3 | 1 | 1 | 0 | 1 | 0 | 31 | 3.433987 | 2 | 11.1 | 1.200909 | 0.183079 | 2.240091 | 0.806516 | 1 | 1 | 0 | -1 | -1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 1 | 0 | 14.3 | 1 | 0 | 0 | 8 | 4 | 22.0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 9 | 6 | 1 | 0 | 1 | 3 | 1 | 1 | 0 | 2 | 2 | 5 | 3 | 1 | 0 | 3 | 2 | 81.66 | 67.80 | 0 | 3 | 5 | 1 | 6.50 | 1.871802 | 34.40 | 3.538057 | 1 | 29.0 | 3.367296 | 161.05 | 5.081715 | 1 | 29.50 | 3.384390 | 126.1 | 4.837075 | 1 | 14.25 | 2.656757 | 60.0 | 4.094345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 13 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 149.46 |
1 | 0648-AIPJSP-UVM | 5 | 5.0 | 0 | 22 | 2 | May | 17 | 4 | 2 | 0 | 0 | 1 | 0 | 15 | 2.708050 | 1 | 18.6 | 1.222020 | 0.200505 | 1.567980 | 0.449788 | 1 | 1 | 0 | -1 | -1 | 2 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 | 3 | 2 | 1 | 2 | 1 | 1 | 6.8 | 1 | 0 | 0 | 1 | 1 | 29.0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 9 | 4 | 1 | 0 | 0 | 2 | 4 | 1 | 0 | 4 | 2 | 4 | 1 | 3 | 0 | 4 | 2 | 42.60 | 34.94 | 1 | 1 | 39 | 0 | 8.90 | 2.186051 | 330.60 | 5.800909 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 54.85 | 4.004602 | 1975.0 | 7.588324 | 1 | 16.00 | 2.772589 | 610.0 | 6.413459 | 1 | 45.65 | 3.821004 | 1683.55 | 7.428660 | 1 | 1 | 1 | 4 | 1 | 0 | 1 | 0 | 1 | 1 | 18 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 77.54 |
2 | 5195-TLUDJE-HVO | 3 | 4.0 | 1 | 67 | 6 | June | 14 | 2 | 2 | 0 | 16 | 5 | 0 | 35 | 3.555348 | 2 | 9.9 | 0.928620 | -0.074056 | 2.536380 | 0.930738 | 0 | 4 | 1 | 13 | 2 | 3 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 30 | 5 | 3 | 1 | 1 | 18.8 | 1 | 0 | 1 | 4 | 3 | 24.0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 5 | 1 | 0 | 0 | 2 | 1 | 4 | 0 | 35 | 5 | 4 | 1 | 3 | 0 | 25 | 5 | 184.22 | 175.75 | 0 | 3 | 65 | 0 | 28.40 | 3.346389 | 1858.35 | 7.527444 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 23.00 | 3.135494 | 1410.0 | 7.251345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 21 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 359.97 |
3 | 4459-VLPQUH-3OL | 4 | 3.0 | 0 | 23 | 2 | May | 16 | 3 | 2 | 0 | 0 | 1 | 0 | 20 | 2.995732 | 1 | 5.7 | 0.022800 | -3.780995 | 1.117200 | 0.110826 | 1 | 2 | 1 | 18 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 3 | 2 | 3 | 1 | 1 | 8.7 | 1 | 0 | 1 | 1 | 1 | 38.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 0 | 0 | 0 | 2 | 1 | 4 | 0 | 5 | 2 | 3 | 2 | 4 | 0 | 5 | 2 | 340.99 | 18.42 | 1 | 1 | 36 | 0 | 6.00 | 1.791759 | 199.45 | 5.295564 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 21.00 | 3.044522 | 685.0 | 6.529419 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 26 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 359.41 |
4 | 8158-SMTQFB-CNO | 2 | 2.0 | 0 | 26 | 3 | July | 16 | 3 | 2 | 0 | 1 | 1 | 0 | 23 | 3.135494 | 1 | 1.7 | 0.214659 | -1.538705 | 0.176341 | -1.735336 | 0 | 1 | 1 | 13 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 2 | 1 | 0 | 1 | 10.6 | 1 | 0 | 1 | 6 | 3 | 32.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 8 | 3 | 1 | 3 | 2 | 0 | 9 | 3 | 255.10 | 252.73 | 1 | 3 | 21 | 0 | 3.05 | 1.115142 | 74.10 | 4.305416 | 1 | 16.5 | 2.803360 | 387.70 | 5.960232 | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 17.25 | 2.847812 | 360.0 | 5.886104 | 1 | 19.05 | 2.947067 | 410.80 | 6.018106 | 0 | 1 | 0 | 3 | 1 | 1 | 1 | 1 | 0 | 1 | 27 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 507.83 |
# Now Run pandas profiling to see the data audit reports
import pandas_profiling
pandas_profiling.ProfileReport(custdata_df)
Dataset info
Number of variables | 131 |
---|---|
Number of observations | 5000 |
Total Missing (%) | 0.2% |
Total size in memory | 5.0 MiB |
Average record size in memory | 1.0 KiB |
Variables types
Numeric | 59 |
---|---|
Categorical | 1 |
Boolean | 49 |
Date | 0 |
Text (Unique) | 1 |
Rejected | 21 |
Unsupported | 0 |
Warnings
address
has 245 / 4.9% zeros Zerosaddresscat
is highly correlated with address
(ρ = 0.92352) Rejectedagecat
is highly correlated with age
(ρ = 0.96988) Rejectedcarbought
has 2901 / 58.0% zeros Zeroscard2spent
has 179 / 3.6% zeros Zeroscard2tenure
is highly correlated with cardtenure
(ρ = 0.96298) Rejectedcard2tenurecat
is highly correlated with card2tenure
(ρ = 0.92439) Rejectedcardmon
has 1419 / 28.4% zeros Zeroscardten
has 1420 / 28.4% zeros Zeroscardtenure
has 91 / 1.8% zeros Zeroscarown
has 799 / 16.0% zeros Zeroscars
has 497 / 9.9% zeros Zeroscartype
has 2287 / 45.7% zeros Zeroscommutecat
is highly correlated with commute
(ρ = 0.98117) Rejectededcat
is highly correlated with ed
(ρ = 0.9639) Rejectedemploy
has 659 / 13.2% zeros Zerosequipmon
is highly correlated with equip
(ρ = 0.94051) Rejectedequipten
has 3296 / 65.9% zeros Zeroshourstv
has 85 / 1.7% zeros Zerosinccat
is highly correlated with lninc
(ρ = 0.94879) Rejectedinternet
has 2498 / 50.0% zeros Zeroslncardmon
is highly correlated with cardmon
(ρ = 0.91687) Rejectedlncardten
has 1422 / 28.4% missing values Missinglnequipmon
is highly correlated with equipmon
(ρ = 0.97931) Rejectedlnequipten
is highly correlated with lntollten
(ρ = 0.96611) Rejectedlnlongten
is highly correlated with lnlongmon
(ρ = 0.92171) Rejectedlntollmon
is highly correlated with tollmon
(ρ = 0.93783) Rejectedlntollten
is highly correlated with lnlongten
(ρ = 0.93139) Rejectedlnwiremon
is highly correlated with wiremon
(ρ = 0.95389) Rejectedlnwireten
is highly correlated with lnequipten
(ρ = 0.98318) Rejectedlongten
is highly correlated with longmon
(ρ = 0.9857) Rejectedpets
has 1529 / 30.6% zeros Zerospets_birds
has 4698 / 94.0% zeros Zerospets_cats
has 3413 / 68.3% zeros Zerospets_dogs
has 3762 / 75.2% zeros Zerospets_freshfish
has 3462 / 69.2% zeros Zerospets_reptiles
has 4818 / 96.4% zeros Zerospets_saltfish
has 4942 / 98.8% zeros Zerospets_small
has 4749 / 95.0% zeros Zerosspoused
is highly correlated with marital
(ρ = 0.95577) Rejectedspousedcat
is highly correlated with spoused
(ρ = 0.98403) Rejectedtenure
is highly correlated with card2tenure
(ρ = 0.92824) Rejectedtollmon
has 2622 / 52.4% zeros Zerostollten
has 2622 / 52.4% zeros Zerostotalspend
is highly correlated with cardspent
(ρ = 0.94149) Rejectedwiremon
has 3656 / 73.1% zeros Zeroswireten
has 3656 / 73.1% zeros Zerosactive
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.466 |
---|
0 |
2670
|
---|---|
1 |
2330
|
Value | Count | Frequency (%) | |
0 | 2670 | 53.4% |
|
1 | 2330 | 46.6% |
|
address
Numeric
Distinct count | 57 |
---|---|
Unique (%) | 1.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 16.402 |
---|---|
Minimum | 0 |
Maximum | 57 |
Zeros (%) | 4.9% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 1 |
Q1 | 6 |
Median | 14 |
Q3 | 25 |
95-th percentile | 40 |
Maximum | 57 |
Range | 57 |
Interquartile range | 19 |
Descriptive statistics
Standard deviation | 12.397 |
---|---|
Coef of variation | 0.75583 |
Kurtosis | -0.22967 |
Mean | 16.402 |
MAD | 10.223 |
Skewness | 0.70655 |
Sum | 82012 |
Variance | 153.7 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 245 | 4.9% |
|
2 | 196 | 3.9% |
|
4 | 195 | 3.9% |
|
5 | 177 | 3.5% |
|
3 | 172 | 3.4% |
|
1 | 169 | 3.4% |
|
8 | 169 | 3.4% |
|
7 | 166 | 3.3% |
|
12 | 166 | 3.3% |
|
6 | 163 | 3.3% |
|
Other values (47) | 3182 | 63.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 245 | 4.9% |
|
1 | 169 | 3.4% |
|
2 | 196 | 3.9% |
|
3 | 172 | 3.4% |
|
4 | 195 | 3.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
52 | 7 | 0.1% |
|
53 | 6 | 0.1% |
|
54 | 1 | 0.0% |
|
55 | 5 | 0.1% |
|
57 | 3 | 0.1% |
|
addresscat
Highly correlated
This variable is highly correlated with address
and should be ignored for analysis
Correlation | 0.92352 |
---|
age
Numeric
Distinct count | 62 |
---|---|
Unique (%) | 1.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 47.026 |
---|---|
Minimum | 18 |
Maximum | 79 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 18 |
---|---|
5-th percentile | 20 |
Q1 | 31 |
Median | 47 |
Q3 | 62 |
95-th percentile | 76 |
Maximum | 79 |
Range | 61 |
Interquartile range | 31 |
Descriptive statistics
Standard deviation | 17.77 |
---|---|
Coef of variation | 0.37789 |
Kurtosis | -1.187 |
Mean | 47.026 |
MAD | 15.403 |
Skewness | 0.09076 |
Sum | 235128 |
Variance | 315.78 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
18 | 106 | 2.1% |
|
35 | 102 | 2.0% |
|
37 | 98 | 2.0% |
|
24 | 97 | 1.9% |
|
21 | 95 | 1.9% |
|
63 | 95 | 1.9% |
|
31 | 94 | 1.9% |
|
57 | 93 | 1.9% |
|
25 | 93 | 1.9% |
|
36 | 92 | 1.8% |
|
Other values (52) | 4035 | 80.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
18 | 106 | 2.1% |
|
19 | 78 | 1.6% |
|
20 | 80 | 1.6% |
|
21 | 95 | 1.9% |
|
22 | 82 | 1.6% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
75 | 74 | 1.5% |
|
76 | 58 | 1.2% |
|
77 | 71 | 1.4% |
|
78 | 70 | 1.4% |
|
79 | 73 | 1.5% |
|
agecat
Highly correlated
This variable is highly correlated with age
and should be ignored for analysis
Correlation | 0.96988 |
---|
bfast
Numeric
Distinct count | 3 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.0586 |
---|---|
Minimum | 1 |
Maximum | 3 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 2 |
Q3 | 3 |
95-th percentile | 3 |
Maximum | 3 |
Range | 2 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 0.82952 |
---|---|
Coef of variation | 0.40295 |
Kurtosis | -1.5385 |
Mean | 2.0586 |
MAD | 0.70605 |
Skewness | -0.10964 |
Sum | 10293 |
Variance | 0.6881 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3 | 1875 | 37.5% |
|
1 | 1582 | 31.6% |
|
2 | 1543 | 30.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1582 | 31.6% |
|
2 | 1543 | 30.9% |
|
3 | 1875 | 37.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1582 | 31.6% |
|
2 | 1543 | 30.9% |
|
3 | 1875 | 37.5% |
|
birthmonth
Categorical
Distinct count | 12 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
September |
|
---|---|
May |
|
January |
|
Other values (9) |
3671
|
Value | Count | Frequency (%) | |
September | 458 | 9.2% |
|
May | 451 | 9.0% |
|
January | 420 | 8.4% |
|
June | 420 | 8.4% |
|
February | 418 | 8.4% |
|
March | 416 | 8.3% |
|
July | 413 | 8.3% |
|
October | 410 | 8.2% |
|
August | 406 | 8.1% |
|
November | 399 | 8.0% |
|
Other values (2) | 789 | 15.8% |
|
callcard
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.7162 |
---|
1 |
3581
|
---|---|
0 |
1419
|
Value | Count | Frequency (%) | |
1 | 3581 | 71.6% |
|
0 | 1419 | 28.4% |
|
callid
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4752 |
---|
0 |
2624
|
---|---|
1 |
2376
|
Value | Count | Frequency (%) | |
0 | 2624 | 52.5% |
|
1 | 2376 | 47.5% |
|
callwait
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.479 |
---|
0 |
2605
|
---|---|
1 |
2395
|
Value | Count | Frequency (%) | |
0 | 2605 | 52.1% |
|
1 | 2395 | 47.9% |
|
carbought
Numeric
Distinct count | 3 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.221 |
---|---|
Minimum | -1 |
Maximum | 1 |
Zeros (%) | 58.0% |
Quantile statistics
Minimum | -1 |
---|---|
5-th percentile | -1 |
Q1 | 0 |
Median | 0 |
Q3 | 1 |
95-th percentile | 1 |
Maximum | 1 |
Range | 2 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.60912 |
---|---|
Coef of variation | 2.7562 |
Kurtosis | -0.5264 |
Mean | 0.221 |
MAD | 0.49918 |
Skewness | -0.15823 |
Sum | 1105 |
Variance | 0.37103 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 2901 | 58.0% |
|
1 | 1602 | 32.0% |
|
-1 | 497 | 9.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 2901 | 58.0% |
|
1 | 1602 | 32.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 2901 | 58.0% |
|
1 | 1602 | 32.0% |
|
carbuy
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.361 |
---|
0 |
3195
|
---|---|
1 |
1805
|
Value | Count | Frequency (%) | |
0 | 3195 | 63.9% |
|
1 | 1805 | 36.1% |
|
carcatvalue
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.3894 |
---|---|
Minimum | -1 |
Maximum | 3 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -1 |
---|---|
5-th percentile | -1 |
Q1 | 1 |
Median | 1 |
Q3 | 2 |
95-th percentile | 3 |
Maximum | 3 |
Range | 4 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 1.0813 |
---|---|
Coef of variation | 0.77825 |
Kurtosis | 0.23064 |
Mean | 1.3894 |
MAD | 0.84868 |
Skewness | -0.49643 |
Sum | 6947 |
Variance | 1.1692 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1 | 2399 | 48.0% |
|
2 | 1267 | 25.3% |
|
3 | 837 | 16.7% |
|
-1 | 497 | 9.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
1 | 2399 | 48.0% |
|
2 | 1267 | 25.3% |
|
3 | 837 | 16.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
1 | 2399 | 48.0% |
|
2 | 1267 | 25.3% |
|
3 | 837 | 16.7% |
|
card
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.7142 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 4 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.1849 |
---|---|
Coef of variation | 0.43656 |
Kurtosis | -1.1112 |
Mean | 2.7142 |
MAD | 1.0323 |
Skewness | 0.015333 |
Sum | 13571 |
Variance | 1.404 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4 | 1344 | 26.9% |
|
2 | 1247 | 24.9% |
|
3 | 1200 | 24.0% |
|
1 | 986 | 19.7% |
|
5 | 223 | 4.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 986 | 19.7% |
|
2 | 1247 | 24.9% |
|
3 | 1200 | 24.0% |
|
4 | 1344 | 26.9% |
|
5 | 223 | 4.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 986 | 19.7% |
|
2 | 1247 | 24.9% |
|
3 | 1200 | 24.0% |
|
4 | 1344 | 26.9% |
|
5 | 223 | 4.5% |
|
card2
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.7744 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.1734 |
---|---|
Coef of variation | 0.42296 |
Kurtosis | -0.91791 |
Mean | 2.7744 |
MAD | 0.99139 |
Skewness | 0.084736 |
Sum | 13872 |
Variance | 1.377 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3 | 1384 | 27.7% |
|
2 | 1301 | 26.0% |
|
4 | 1141 | 22.8% |
|
1 | 829 | 16.6% |
|
5 | 345 | 6.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 829 | 16.6% |
|
2 | 1301 | 26.0% |
|
3 | 1384 | 27.7% |
|
4 | 1141 | 22.8% |
|
5 | 345 | 6.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 829 | 16.6% |
|
2 | 1301 | 26.0% |
|
3 | 1384 | 27.7% |
|
4 | 1141 | 22.8% |
|
5 | 345 | 6.9% |
|
card2benefit
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.534 |
---|---|
Minimum | 1 |
Maximum | 4 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 4 |
Maximum | 4 |
Range | 3 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.1173 |
---|---|
Coef of variation | 0.44091 |
Kurtosis | -1.3562 |
Mean | 2.534 |
MAD | 0.99851 |
Skewness | -0.046519 |
Sum | 12670 |
Variance | 1.2483 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4 | 1294 | 25.9% |
|
3 | 1286 | 25.7% |
|
2 | 1216 | 24.3% |
|
1 | 1204 | 24.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1204 | 24.1% |
|
2 | 1216 | 24.3% |
|
3 | 1286 | 25.7% |
|
4 | 1294 | 25.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1204 | 24.1% |
|
2 | 1216 | 24.3% |
|
3 | 1286 | 25.7% |
|
4 | 1294 | 25.9% |
|
card2fee
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1872 |
---|
0 |
4064
|
---|---|
1 |
936
|
Value | Count | Frequency (%) | |
0 | 4064 | 81.3% |
|
1 | 936 | 18.7% |
|
card2spent
Numeric
Distinct count | 4477 |
---|---|
Unique (%) | 89.5% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 160.88 |
---|---|
Minimum | 0 |
Maximum | 2069.2 |
Zeros (%) | 3.6% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 14.819 |
Q1 | 66.968 |
Median | 125.34 |
Q3 | 208.31 |
95-th percentile | 419.45 |
Maximum | 2069.2 |
Range | 2069.2 |
Interquartile range | 141.34 |
Descriptive statistics
Standard deviation | 146.29 |
---|---|
Coef of variation | 0.90935 |
Kurtosis | 15.736 |
Mean | 160.88 |
MAD | 100.44 |
Skewness | 2.8012 |
Sum | 804380 |
Variance | 21402 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 179 | 3.6% |
|
63.690000000000005 | 3 | 0.1% |
|
92.92 | 3 | 0.1% |
|
175.75 | 3 | 0.1% |
|
97.87 | 3 | 0.1% |
|
112.88 | 3 | 0.1% |
|
128.54 | 3 | 0.1% |
|
159.1 | 3 | 0.1% |
|
38.410000000000004 | 3 | 0.1% |
|
128.35 | 3 | 0.1% |
|
Other values (4467) | 4794 | 95.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 179 | 3.6% |
|
6.1000000000000005 | 1 | 0.0% |
|
6.54 | 1 | 0.0% |
|
6.86 | 1 | 0.0% |
|
7.140000000000001 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1277.68 | 1 | 0.0% |
|
1282.76 | 1 | 0.0% |
|
1309.3700000000001 | 1 | 0.0% |
|
1611.3500000000001 | 1 | 0.0% |
|
2069.25 | 1 | 0.0% |
|
card2tenure
Highly correlated
This variable is highly correlated with cardtenure
and should be ignored for analysis
Correlation | 0.96298 |
---|
card2tenurecat
Highly correlated
This variable is highly correlated with card2tenure
and should be ignored for analysis
Correlation | 0.92439 |
---|
card2type
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.5412 |
---|---|
Minimum | 1 |
Maximum | 4 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 4 |
Maximum | 4 |
Range | 3 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.1188 |
---|---|
Coef of variation | 0.44027 |
Kurtosis | -1.3601 |
Mean | 2.5412 |
MAD | 1.0003 |
Skewness | -0.04748 |
Sum | 12706 |
Variance | 1.2518 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4 | 1319 | 26.4% |
|
3 | 1257 | 25.1% |
|
2 | 1235 | 24.7% |
|
1 | 1189 | 23.8% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1189 | 23.8% |
|
2 | 1235 | 24.7% |
|
3 | 1257 | 25.1% |
|
4 | 1319 | 26.4% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1189 | 23.8% |
|
2 | 1235 | 24.7% |
|
3 | 1257 | 25.1% |
|
4 | 1319 | 26.4% |
|
cardbenefit
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.5058 |
---|---|
Minimum | 1 |
Maximum | 4 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 3.25 |
95-th percentile | 4 |
Maximum | 4 |
Range | 3 |
Interquartile range | 1.25 |
Descriptive statistics
Standard deviation | 1.1172 |
---|---|
Coef of variation | 0.44586 |
Kurtosis | -1.3579 |
Mean | 2.5058 |
MAD | 0.99894 |
Skewness | -0.012388 |
Sum | 12529 |
Variance | 1.2482 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3 | 1274 | 25.5% |
|
4 | 1250 | 25.0% |
|
1 | 1245 | 24.9% |
|
2 | 1231 | 24.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1245 | 24.9% |
|
2 | 1231 | 24.6% |
|
3 | 1274 | 25.5% |
|
4 | 1250 | 25.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1245 | 24.9% |
|
2 | 1231 | 24.6% |
|
3 | 1274 | 25.5% |
|
4 | 1250 | 25.0% |
|
cardfee
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1898 |
---|
0 |
4051
|
---|---|
1 |
949
|
Value | Count | Frequency (%) | |
0 | 4051 | 81.0% |
|
1 | 949 | 19.0% |
|
cardmon
Numeric
Distinct count | 271 |
---|---|
Unique (%) | 5.4% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 15.444 |
---|---|
Minimum | 0 |
Maximum | 188.5 |
Zeros (%) | 28.4% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 13.75 |
Q3 | 22.75 |
95-th percentile | 42 |
Maximum | 188.5 |
Range | 188.5 |
Interquartile range | 22.75 |
Descriptive statistics
Standard deviation | 15.008 |
---|---|
Coef of variation | 0.97175 |
Kurtosis | 7.1671 |
Mean | 15.444 |
MAD | 11.245 |
Skewness | 1.6877 |
Sum | 77219 |
Variance | 225.23 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 1419 | 28.4% |
|
13.25 | 53 | 1.1% |
|
11.5 | 52 | 1.0% |
|
16.5 | 49 | 1.0% |
|
16.25 | 49 | 1.0% |
|
13.75 | 47 | 0.9% |
|
18.25 | 45 | 0.9% |
|
13.5 | 45 | 0.9% |
|
14.25 | 44 | 0.9% |
|
15.0 | 44 | 0.9% |
|
Other values (261) | 3153 | 63.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 1419 | 28.4% |
|
3.25 | 1 | 0.0% |
|
3.75 | 1 | 0.0% |
|
4.0 | 3 | 0.1% |
|
4.25 | 9 | 0.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
100.25 | 1 | 0.0% |
|
102.0 | 1 | 0.0% |
|
104.5 | 1 | 0.0% |
|
138.25 | 1 | 0.0% |
|
188.5 | 1 | 0.0% |
|
cardspent
Numeric
Distinct count | 4760 |
---|---|
Unique (%) | 95.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 337.2 |
---|---|
Minimum | 0 |
Maximum | 3926.4 |
Zeros (%) | 0.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 91.305 |
Q1 | 183.38 |
Median | 276.36 |
Q3 | 418.54 |
95-th percentile | 782.32 |
Maximum | 3926.4 |
Range | 3926.4 |
Interquartile range | 235.16 |
Descriptive statistics
Standard deviation | 245.15 |
---|---|
Coef of variation | 0.727 |
Kurtosis | 21.44 |
Mean | 337.2 |
MAD | 167.79 |
Skewness | 3.0512 |
Sum | 1686000 |
Variance | 60096 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 7 | 0.1% |
|
186.91 | 4 | 0.1% |
|
245.84 | 3 | 0.1% |
|
321.19 | 3 | 0.1% |
|
231.14000000000001 | 3 | 0.1% |
|
202.31 | 3 | 0.1% |
|
237.16 | 3 | 0.1% |
|
412.99 | 3 | 0.1% |
|
122.54 | 3 | 0.1% |
|
249.0 | 3 | 0.1% |
|
Other values (4750) | 4965 | 99.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 7 | 0.1% |
|
6.97 | 1 | 0.0% |
|
7.34 | 1 | 0.0% |
|
7.53 | 1 | 0.0% |
|
8.11 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
2461.03 | 1 | 0.0% |
|
2503.25 | 1 | 0.0% |
|
2969.39 | 1 | 0.0% |
|
3104.63 | 1 | 0.0% |
|
3926.41 | 1 | 0.0% |
|
cardten
Numeric
Distinct count | 698 |
---|---|
Unique (%) | 14.0% |
Missing (%) | 0.0% |
Missing (n) | 2 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 720.48 |
---|---|
Minimum | 0 |
Maximum | 13705 |
Zeros (%) | 28.4% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 425 |
Q3 | 1080 |
95-th percentile | 2455.7 |
Maximum | 13705 |
Range | 13705 |
Interquartile range | 1080 |
Descriptive statistics
Standard deviation | 922.23 |
---|---|
Coef of variation | 1.28 |
Kurtosis | 15.163 |
Mean | 720.48 |
MAD | 667.37 |
Skewness | 2.6459 |
Sum | 3601000 |
Variance | 850500 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 1420 | 28.4% |
|
590.0 | 21 | 0.4% |
|
200.0 | 20 | 0.4% |
|
380.0 | 20 | 0.4% |
|
45.0 | 19 | 0.4% |
|
195.0 | 19 | 0.4% |
|
500.0 | 19 | 0.4% |
|
330.0 | 18 | 0.4% |
|
220.0 | 18 | 0.4% |
|
435.0 | 18 | 0.4% |
|
Other values (687) | 3406 | 68.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 1420 | 28.4% |
|
4.75 | 1 | 0.0% |
|
5.0 | 17 | 0.3% |
|
5.25 | 1 | 0.0% |
|
7.75 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
6440.0 | 1 | 0.0% |
|
7115.0 | 1 | 0.0% |
|
7310.0 | 1 | 0.0% |
|
9920.0 | 1 | 0.0% |
|
13705.0 | 1 | 0.0% |
|
cardtenure
Numeric
Distinct count | 41 |
---|---|
Unique (%) | 0.8% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 16.656 |
---|---|
Minimum | 0 |
Maximum | 40 |
Zeros (%) | 1.8% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 1 |
Q1 | 6 |
Median | 14 |
Q3 | 26 |
95-th percentile | 38 |
Maximum | 40 |
Range | 40 |
Interquartile range | 20 |
Descriptive statistics
Standard deviation | 12.021 |
---|---|
Coef of variation | 0.72173 |
Kurtosis | -1.0561 |
Mean | 16.656 |
MAD | 10.355 |
Skewness | 0.42936 |
Sum | 83279 |
Variance | 144.5 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3 | 246 | 4.9% |
|
1 | 228 | 4.6% |
|
2 | 220 | 4.4% |
|
4 | 193 | 3.9% |
|
5 | 188 | 3.8% |
|
6 | 176 | 3.5% |
|
7 | 163 | 3.3% |
|
11 | 158 | 3.2% |
|
8 | 158 | 3.2% |
|
9 | 153 | 3.1% |
|
Other values (31) | 3117 | 62.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 91 | 1.8% |
|
1 | 228 | 4.6% |
|
2 | 220 | 4.4% |
|
3 | 246 | 4.9% |
|
4 | 193 | 3.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
36 | 72 | 1.4% |
|
37 | 83 | 1.7% |
|
38 | 98 | 2.0% |
|
39 | 113 | 2.3% |
|
40 | 126 | 2.5% |
|
cardtenurecat
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.7822 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 3 |
Median | 4 |
Q3 | 5 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.3538 |
---|---|
Coef of variation | 0.35794 |
Kurtosis | -1.0266 |
Mean | 3.7822 |
MAD | 1.2057 |
Skewness | -0.62824 |
Sum | 18911 |
Variance | 1.8327 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
5 | 2351 | 47.0% |
|
2 | 847 | 16.9% |
|
3 | 789 | 15.8% |
|
4 | 694 | 13.9% |
|
1 | 319 | 6.4% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 319 | 6.4% |
|
2 | 847 | 16.9% |
|
3 | 789 | 15.8% |
|
4 | 694 | 13.9% |
|
5 | 2351 | 47.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 319 | 6.4% |
|
2 | 847 | 16.9% |
|
3 | 789 | 15.8% |
|
4 | 694 | 13.9% |
|
5 | 2351 | 47.0% |
|
cardtype
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.507 |
---|---|
Minimum | 1 |
Maximum | 4 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 4 |
Maximum | 4 |
Range | 3 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.1185 |
---|---|
Coef of variation | 0.44614 |
Kurtosis | -1.3608 |
Mean | 2.507 |
MAD | 1.0004 |
Skewness | -0.0098086 |
Sum | 12535 |
Variance | 1.251 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4 | 1260 | 25.2% |
|
3 | 1257 | 25.1% |
|
1 | 1242 | 24.8% |
|
2 | 1241 | 24.8% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1242 | 24.8% |
|
2 | 1241 | 24.8% |
|
3 | 1257 | 25.1% |
|
4 | 1260 | 25.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1242 | 24.8% |
|
2 | 1241 | 24.8% |
|
3 | 1257 | 25.1% |
|
4 | 1260 | 25.2% |
|
carown
Numeric
Distinct count | 3 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.6414 |
---|---|
Minimum | -1 |
Maximum | 1 |
Zeros (%) | 16.0% |
Quantile statistics
Minimum | -1 |
---|---|
5-th percentile | -1 |
Q1 | 0 |
Median | 1 |
Q3 | 1 |
95-th percentile | 1 |
Maximum | 1 |
Range | 2 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.6549 |
---|---|
Coef of variation | 1.021 |
Kurtosis | 1.14 |
Mean | 0.6414 |
MAD | 0.5313 |
Skewness | -1.5944 |
Sum | 3207 |
Variance | 0.42889 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1 | 3704 | 74.1% |
|
0 | 799 | 16.0% |
|
-1 | 497 | 9.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 799 | 16.0% |
|
1 | 3704 | 74.1% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 799 | 16.0% |
|
1 | 3704 | 74.1% |
|
cars
Numeric
Distinct count | 9 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.1306 |
---|---|
Minimum | 0 |
Maximum | 8 |
Zeros (%) | 9.9% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 1 |
Median | 2 |
Q3 | 3 |
95-th percentile | 4 |
Maximum | 8 |
Range | 8 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.3075 |
---|---|
Coef of variation | 0.61366 |
Kurtosis | 0.32839 |
Mean | 2.1306 |
MAD | 1.0136 |
Skewness | 0.50172 |
Sum | 10653 |
Variance | 1.7095 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
2 | 1607 | 32.1% |
|
1 | 1119 | 22.4% |
|
3 | 1082 | 21.6% |
|
0 | 497 | 9.9% |
|
4 | 481 | 9.6% |
|
5 | 149 | 3.0% |
|
6 | 51 | 1.0% |
|
7 | 13 | 0.3% |
|
8 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 497 | 9.9% |
|
1 | 1119 | 22.4% |
|
2 | 1607 | 32.1% |
|
3 | 1082 | 21.6% |
|
4 | 481 | 9.6% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4 | 481 | 9.6% |
|
5 | 149 | 3.0% |
|
6 | 51 | 1.0% |
|
7 | 13 | 0.3% |
|
8 | 1 | 0.0% |
|
cartype
Numeric
Distinct count | 3 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.3438 |
---|---|
Minimum | -1 |
Maximum | 1 |
Zeros (%) | 45.7% |
Quantile statistics
Minimum | -1 |
---|---|
5-th percentile | -1 |
Q1 | 0 |
Median | 0 |
Q3 | 1 |
95-th percentile | 1 |
Maximum | 1 |
Range | 2 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.65153 |
---|---|
Coef of variation | 1.8951 |
Kurtosis | -0.70821 |
Mean | 0.3438 |
MAD | 0.58166 |
Skewness | -0.48685 |
Sum | 1719 |
Variance | 0.42449 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 2287 | 45.7% |
|
1 | 2216 | 44.3% |
|
-1 | 497 | 9.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 2287 | 45.7% |
|
1 | 2216 | 44.3% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
-1 | 497 | 9.9% |
|
0 | 2287 | 45.7% |
|
1 | 2216 | 44.3% |
|
carvalue
Numeric
Distinct count | 801 |
---|---|
Unique (%) | 16.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 23.233 |
---|---|
Minimum | -1 |
Maximum | 99.6 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -1 |
---|---|
5-th percentile | -1 |
Q1 | 9.2 |
Median | 17 |
Q3 | 31.1 |
95-th percentile | 72 |
Maximum | 99.6 |
Range | 100.6 |
Interquartile range | 21.9 |
Descriptive statistics
Standard deviation | 21.232 |
---|---|
Coef of variation | 0.91387 |
Kurtosis | 1.9517 |
Mean | 23.233 |
MAD | 15.904 |
Skewness | 1.474 |
Sum | 116160 |
Variance | 450.78 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
-1.0 | 497 | 9.9% |
|
9.8 | 25 | 0.5% |
|
13.5 | 24 | 0.5% |
|
6.300000000000001 | 24 | 0.5% |
|
10.200000000000001 | 23 | 0.5% |
|
13.0 | 23 | 0.5% |
|
11.4 | 22 | 0.4% |
|
9.1 | 22 | 0.4% |
|
9.200000000000001 | 22 | 0.4% |
|
9.9 | 22 | 0.4% |
|
Other values (791) | 4296 | 85.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-1.0 | 497 | 9.9% |
|
2.2 | 1 | 0.0% |
|
2.3000000000000003 | 1 | 0.0% |
|
2.4000000000000004 | 1 | 0.0% |
|
2.5 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
98.2 | 1 | 0.0% |
|
98.5 | 4 | 0.1% |
|
98.80000000000001 | 1 | 0.0% |
|
99.2 | 1 | 0.0% |
|
99.60000000000001 | 1 | 0.0% |
|
churn
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2532 |
---|
0 |
3734
|
---|---|
1 |
1266
|
Value | Count | Frequency (%) | |
0 | 3734 | 74.7% |
|
1 | 1266 | 25.3% |
|
commute
Numeric
Distinct count | 10 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.9962 |
---|---|
Minimum | 1 |
Maximum | 10 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 1 |
Q3 | 4 |
95-th percentile | 8 |
Maximum | 10 |
Range | 9 |
Interquartile range | 3 |
Descriptive statistics
Standard deviation | 2.7435 |
---|---|
Coef of variation | 0.91567 |
Kurtosis | -0.045572 |
Mean | 2.9962 |
MAD | 2.2996 |
Skewness | 1.1277 |
Sum | 14981 |
Variance | 7.5269 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1 | 2855 | 57.1% |
|
4 | 635 | 12.7% |
|
8 | 585 | 11.7% |
|
5 | 302 | 6.0% |
|
3 | 295 | 5.9% |
|
10 | 153 | 3.1% |
|
7 | 56 | 1.1% |
|
2 | 50 | 1.0% |
|
6 | 44 | 0.9% |
|
9 | 25 | 0.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 2855 | 57.1% |
|
2 | 50 | 1.0% |
|
3 | 295 | 5.9% |
|
4 | 635 | 12.7% |
|
5 | 302 | 6.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
6 | 44 | 0.9% |
|
7 | 56 | 1.1% |
|
8 | 585 | 11.7% |
|
9 | 25 | 0.5% |
|
10 | 153 | 3.1% |
|
commutebike
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1234 |
---|
0 |
4383
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4383 | 87.7% |
|
1 | 617 | 12.3% |
|
commutebus
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.406 |
---|
0 |
2970
|
---|---|
1 |
2030
|
Value | Count | Frequency (%) | |
0 | 2970 | 59.4% |
|
1 | 2030 | 40.6% |
|
commutecar
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.679 |
---|
1 |
3395
|
---|---|
0 |
1605
|
Value | Count | Frequency (%) | |
1 | 3395 | 67.9% |
|
0 | 1605 | 32.1% |
|
commutecarpool
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2718 |
---|
0 |
3641
|
---|---|
1 |
1359
|
Value | Count | Frequency (%) | |
0 | 3641 | 72.8% |
|
1 | 1359 | 27.2% |
|
commutecat
Highly correlated
This variable is highly correlated with commute
and should be ignored for analysis
Correlation | 0.98117 |
---|
commutemotorcycle
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1026 |
---|
0 |
4487
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4487 | 89.7% |
|
1 | 513 | 10.3% |
|
commutenonmotor
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0584 |
---|
0 |
4708
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4708 | 94.2% |
|
1 | 292 | 5.8% |
|
commutepublic
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0954 |
---|
0 |
4523
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4523 | 90.5% |
|
1 | 477 | 9.5% |
|
commuterail
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2746 |
---|
0 |
3627
|
---|---|
1 |
1373
|
Value | Count | Frequency (%) | |
0 | 3627 | 72.5% |
|
1 | 1373 | 27.5% |
|
commutetime
Numeric
Distinct count | 42 |
---|---|
Unique (%) | 0.8% |
Missing (%) | 0.0% |
Missing (n) | 2 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 25.346 |
---|---|
Minimum | 8 |
Maximum | 48 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 8 |
---|---|
5-th percentile | 16 |
Q1 | 21 |
Median | 25 |
Q3 | 29 |
95-th percentile | 35 |
Maximum | 48 |
Range | 40 |
Interquartile range | 8 |
Descriptive statistics
Standard deviation | 5.8791 |
---|---|
Coef of variation | 0.23196 |
Kurtosis | 0.13487 |
Mean | 25.346 |
MAD | 4.6895 |
Skewness | 0.29028 |
Sum | 126680 |
Variance | 34.564 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
24.0 | 336 | 6.7% |
|
23.0 | 335 | 6.7% |
|
27.0 | 331 | 6.6% |
|
25.0 | 330 | 6.6% |
|
22.0 | 325 | 6.5% |
|
26.0 | 311 | 6.2% |
|
21.0 | 307 | 6.1% |
|
28.0 | 293 | 5.9% |
|
29.0 | 260 | 5.2% |
|
30.0 | 226 | 4.5% |
|
Other values (31) | 1944 | 38.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
8.0 | 1 | 0.0% |
|
9.0 | 6 | 0.1% |
|
10.0 | 4 | 0.1% |
|
11.0 | 9 | 0.2% |
|
12.0 | 22 | 0.4% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
44.0 | 4 | 0.1% |
|
45.0 | 4 | 0.1% |
|
46.0 | 6 | 0.1% |
|
47.0 | 1 | 0.0% |
|
48.0 | 1 | 0.0% |
|
commutewalk
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.3838 |
---|
0 |
3081
|
---|---|
1 |
1919
|
Value | Count | Frequency (%) | |
0 | 3081 | 61.6% |
|
1 | 1919 | 38.4% |
|
confer
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.478 |
---|
0 |
2610
|
---|---|
1 |
2390
|
Value | Count | Frequency (%) | |
0 | 2610 | 52.2% |
|
1 | 2390 | 47.8% |
|
creddebt
Numeric
Distinct count | 4950 |
---|---|
Unique (%) | 99.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.8573 |
---|---|
Minimum | 0 |
Maximum | 109.07 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.10109 |
Q1 | 0.38552 |
Median | 0.92644 |
Q3 | 2.0638 |
95-th percentile | 6.373 |
Maximum | 109.07 |
Range | 109.07 |
Interquartile range | 1.6783 |
Descriptive statistics
Standard deviation | 3.4157 |
---|---|
Coef of variation | 1.8391 |
Kurtosis | 248.53 |
Mean | 1.8573 |
MAD | 1.669 |
Skewness | 11.12 |
Sum | 9286.6 |
Variance | 11.667 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.272 | 2 | 0.0% |
|
0.23587200000000003 | 2 | 0.0% |
|
0.17171999999999998 | 2 | 0.0% |
|
0.31600799999999996 | 2 | 0.0% |
|
1.6744 | 2 | 0.0% |
|
1.254 | 2 | 0.0% |
|
0.658368 | 2 | 0.0% |
|
0.129778 | 2 | 0.0% |
|
0.4984199999999999 | 2 | 0.0% |
|
0.66528 | 2 | 0.0% |
|
Other values (4940) | 4980 | 99.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 1 | 0.0% |
|
0.001364 | 1 | 0.0% |
|
0.00341 | 1 | 0.0% |
|
0.00494 | 1 | 0.0% |
|
0.006344 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
42.0985 | 1 | 0.0% |
|
44.245560000000005 | 1 | 0.0% |
|
48.704524 | 1 | 0.0% |
|
67.49085 | 1 | 0.0% |
|
109.072596 | 1 | 0.0% |
|
custid
Categorical, Unique
First 3 values |
---|
0394-AVUMJX-JAH |
0191-VKRHCM-922 |
9844-XTDOZB-DSM |
Last 3 values |
---|
6015-ASUOWY-VXJ |
7186-WTDJGD-F2K |
6289-YVKMBB-CXK |
First 10 values
Value | Count | Frequency (%) | |
0002-GTOKLU-YVY | 1 | 0.0% |
|
0003-RLTRGE-IW2 | 1 | 0.0% |
|
0003-UTGKPR-PRU | 1 | 0.0% |
|
0008-ZIQQOT-SGB | 1 | 0.0% |
|
0012-CIVYLF-839 | 1 | 0.0% |
|
Last 10 values
Value | Count | Frequency (%) | |
9991-FCIBKT-W29 | 1 | 0.0% |
|
9992-FSFJPL-5D6 | 1 | 0.0% |
|
9997-QIXKNU-54A | 1 | 0.0% |
|
9998-ZGHXLK-EOT | 1 | 0.0% |
|
9999-EGLHVE-19G | 1 | 0.0% |
|
debtinc
Numeric
Distinct count | 325 |
---|---|
Unique (%) | 6.5% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 9.9542 |
---|---|
Minimum | 0 |
Maximum | 43.1 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 1.9 |
Q1 | 5.1 |
Median | 8.8 |
Q3 | 13.6 |
95-th percentile | 22.2 |
Maximum | 43.1 |
Range | 43.1 |
Interquartile range | 8.5 |
Descriptive statistics
Standard deviation | 6.3998 |
---|---|
Coef of variation | 0.64293 |
Kurtosis | 1.3765 |
Mean | 9.9542 |
MAD | 5.0211 |
Skewness | 1.0619 |
Sum | 49771 |
Variance | 40.957 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
7.000000000000001 | 48 | 1.0% |
|
6.9 | 46 | 0.9% |
|
4.1000000000000005 | 46 | 0.9% |
|
5.4 | 45 | 0.9% |
|
4.3999999999999995 | 42 | 0.8% |
|
6.6000000000000005 | 42 | 0.8% |
|
7.3 | 41 | 0.8% |
|
5.8999999999999995 | 39 | 0.8% |
|
11.3 | 39 | 0.8% |
|
5.5 | 38 | 0.8% |
|
Other values (315) | 4574 | 91.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 1 | 0.0% |
|
0.1 | 6 | 0.1% |
|
0.2 | 2 | 0.0% |
|
0.3 | 5 | 0.1% |
|
0.4 | 6 | 0.1% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
38.2 | 1 | 0.0% |
|
40.699999999999996 | 1 | 0.0% |
|
41.0 | 1 | 0.0% |
|
41.699999999999996 | 1 | 0.0% |
|
43.1 | 1 | 0.0% |
|
default
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2342 |
---|
0 |
3829
|
---|---|
1 |
1171
|
Value | Count | Frequency (%) | |
0 | 3829 | 76.6% |
|
1 | 1171 | 23.4% |
|
ebill
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.3486 |
---|
0 |
3257
|
---|---|
1 |
1743
|
Value | Count | Frequency (%) | |
0 | 3257 | 65.1% |
|
1 | 1743 | 34.9% |
|
ed
Numeric
Distinct count | 18 |
---|---|
Unique (%) | 0.4% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 14.543 |
---|---|
Minimum | 6 |
Maximum | 23 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 6 |
---|---|
5-th percentile | 9 |
Q1 | 12 |
Median | 14 |
Q3 | 17 |
95-th percentile | 20 |
Maximum | 23 |
Range | 17 |
Interquartile range | 5 |
Descriptive statistics
Standard deviation | 3.2811 |
---|---|
Coef of variation | 0.22561 |
Kurtosis | -0.60706 |
Mean | 14.543 |
MAD | 2.7074 |
Skewness | 0.0037335 |
Sum | 72715 |
Variance | 10.766 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
14 | 569 | 11.4% |
|
15 | 536 | 10.7% |
|
13 | 531 | 10.6% |
|
16 | 486 | 9.7% |
|
12 | 467 | 9.3% |
|
17 | 454 | 9.1% |
|
11 | 362 | 7.2% |
|
18 | 349 | 7.0% |
|
19 | 308 | 6.2% |
|
10 | 260 | 5.2% |
|
Other values (8) | 678 | 13.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
6 | 8 | 0.2% |
|
7 | 31 | 0.6% |
|
8 | 107 | 2.1% |
|
9 | 178 | 3.6% |
|
10 | 260 | 5.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
19 | 308 | 6.2% |
|
20 | 206 | 4.1% |
|
21 | 111 | 2.2% |
|
22 | 33 | 0.7% |
|
23 | 4 | 0.1% |
|
edcat
Highly correlated
This variable is highly correlated with ed
and should be ignored for analysis
Correlation | 0.9639 |
---|
empcat
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.9326 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.4533 |
---|---|
Coef of variation | 0.49557 |
Kurtosis | -1.3354 |
Mean | 2.9326 |
MAD | 1.2503 |
Skewness | 0.15135 |
Sum | 14663 |
Variance | 2.1121 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
2 | 1180 | 23.6% |
|
5 | 1135 | 22.7% |
|
1 | 1048 | 21.0% |
|
3 | 968 | 19.4% |
|
4 | 669 | 13.4% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1048 | 21.0% |
|
2 | 1180 | 23.6% |
|
3 | 968 | 19.4% |
|
4 | 669 | 13.4% |
|
5 | 1135 | 22.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1048 | 21.0% |
|
2 | 1180 | 23.6% |
|
3 | 968 | 19.4% |
|
4 | 669 | 13.4% |
|
5 | 1135 | 22.7% |
|
employ
Numeric
Distinct count | 52 |
---|---|
Unique (%) | 1.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 9.7304 |
---|---|
Minimum | 0 |
Maximum | 52 |
Zeros (%) | 13.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 2 |
Median | 7 |
Q3 | 15 |
95-th percentile | 31 |
Maximum | 52 |
Range | 52 |
Interquartile range | 13 |
Descriptive statistics
Standard deviation | 9.6909 |
---|---|
Coef of variation | 0.99594 |
Kurtosis | 1.0529 |
Mean | 9.7304 |
MAD | 7.6646 |
Skewness | 1.2519 |
Sum | 48652 |
Variance | 93.914 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 659 | 13.2% |
|
1 | 389 | 7.8% |
|
2 | 318 | 6.4% |
|
3 | 309 | 6.2% |
|
4 | 293 | 5.9% |
|
5 | 260 | 5.2% |
|
6 | 250 | 5.0% |
|
7 | 191 | 3.8% |
|
8 | 187 | 3.7% |
|
11 | 184 | 3.7% |
|
Other values (42) | 1960 | 39.2% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 659 | 13.2% |
|
1 | 389 | 7.8% |
|
2 | 318 | 6.4% |
|
3 | 309 | 6.2% |
|
4 | 293 | 5.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
47 | 1 | 0.0% |
|
48 | 1 | 0.0% |
|
49 | 1 | 0.0% |
|
51 | 1 | 0.0% |
|
52 | 1 | 0.0% |
|
equip
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.3408 |
---|
0 |
3296
|
---|---|
1 |
1704
|
Value | Count | Frequency (%) | |
0 | 3296 | 65.9% |
|
1 | 1704 | 34.1% |
|
equipmon
Highly correlated
This variable is highly correlated with equip
and should be ignored for analysis
Correlation | 0.94051 |
---|
equipten
Numeric
Distinct count | 1683 |
---|---|
Unique (%) | 33.7% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 470.18 |
---|---|
Minimum | 0 |
Maximum | 6525.3 |
Zeros (%) | 65.9% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 510.16 |
95-th percentile | 2601 |
Maximum | 6525.3 |
Range | 6525.3 |
Interquartile range | 510.16 |
Descriptive statistics
Standard deviation | 912.22 |
---|---|
Coef of variation | 1.9402 |
Kurtosis | 4.7863 |
Mean | 470.18 |
MAD | 664.55 |
Skewness | 2.2149 |
Sum | 2350900 |
Variance | 832150 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 3296 | 65.9% |
|
1259.35 | 2 | 0.0% |
|
446.45 | 2 | 0.0% |
|
1918.8 | 2 | 0.0% |
|
824.3 | 2 | 0.0% |
|
2778.3 | 2 | 0.0% |
|
723.2 | 2 | 0.0% |
|
163.4 | 2 | 0.0% |
|
206.7 | 2 | 0.0% |
|
224.7 | 2 | 0.0% |
|
Other values (1673) | 1686 | 33.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 3296 | 65.9% |
|
12.05 | 1 | 0.0% |
|
14.65 | 1 | 0.0% |
|
14.85 | 1 | 0.0% |
|
16.1 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
5174.45 | 1 | 0.0% |
|
5996.85 | 1 | 0.0% |
|
6014.7 | 1 | 0.0% |
|
6158.95 | 1 | 0.0% |
|
6525.3 | 1 | 0.0% |
|
forward
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4806 |
---|
0 |
2597
|
---|---|
1 |
2403
|
Value | Count | Frequency (%) | |
0 | 2597 | 51.9% |
|
1 | 2403 | 48.1% |
|
gender
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.5036 |
---|
1 |
2518
|
---|---|
0 |
2482
|
Value | Count | Frequency (%) | |
1 | 2518 | 50.4% |
|
0 | 2482 | 49.6% |
|
homeown
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.6296 |
---|
1 |
3148
|
---|---|
0 |
1852
|
Value | Count | Frequency (%) | |
1 | 3148 | 63.0% |
|
0 | 1852 | 37.0% |
|
hometype
Numeric
Distinct count | 4 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.8426 |
---|---|
Minimum | 1 |
Maximum | 4 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 2 |
Q3 | 2 |
95-th percentile | 4 |
Maximum | 4 |
Range | 3 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.91673 |
---|---|
Coef of variation | 0.49752 |
Kurtosis | -0.43415 |
Mean | 1.8426 |
MAD | 0.7634 |
Skewness | 0.76947 |
Sum | 9213 |
Variance | 0.84039 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1 | 2265 | 45.3% |
|
2 | 1548 | 31.0% |
|
3 | 896 | 17.9% |
|
4 | 291 | 5.8% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 2265 | 45.3% |
|
2 | 1548 | 31.0% |
|
3 | 896 | 17.9% |
|
4 | 291 | 5.8% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 2265 | 45.3% |
|
2 | 1548 | 31.0% |
|
3 | 896 | 17.9% |
|
4 | 291 | 5.8% |
|
hourstv
Numeric
Distinct count | 32 |
---|---|
Unique (%) | 0.6% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 19.645 |
---|---|
Minimum | 0 |
Maximum | 36 |
Zeros (%) | 1.7% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 12 |
Q1 | 17 |
Median | 20 |
Q3 | 23 |
95-th percentile | 28 |
Maximum | 36 |
Range | 36 |
Interquartile range | 6 |
Descriptive statistics
Standard deviation | 5.1656 |
---|---|
Coef of variation | 0.26295 |
Kurtosis | 2.3484 |
Mean | 19.645 |
MAD | 3.8622 |
Skewness | -0.64471 |
Sum | 98225 |
Variance | 26.684 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
20 | 451 | 9.0% |
|
19 | 445 | 8.9% |
|
21 | 440 | 8.8% |
|
18 | 413 | 8.3% |
|
22 | 371 | 7.4% |
|
17 | 350 | 7.0% |
|
16 | 309 | 6.2% |
|
23 | 301 | 6.0% |
|
15 | 263 | 5.3% |
|
24 | 248 | 5.0% |
|
Other values (22) | 1409 | 28.2% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 85 | 1.7% |
|
6 | 1 | 0.0% |
|
7 | 3 | 0.1% |
|
8 | 9 | 0.2% |
|
9 | 13 | 0.3% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
32 | 10 | 0.2% |
|
33 | 8 | 0.2% |
|
34 | 9 | 0.2% |
|
35 | 6 | 0.1% |
|
36 | 3 | 0.1% |
|
inccat
Highly correlated
This variable is highly correlated with lninc
and should be ignored for analysis
Correlation | 0.94879 |
---|
income
Numeric
Distinct count | 266 |
---|---|
Unique (%) | 5.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 54.76 |
---|---|
Minimum | 9 |
Maximum | 1073 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 9 |
---|---|
5-th percentile | 13 |
Q1 | 24 |
Median | 38 |
Q3 | 67 |
95-th percentile | 147 |
Maximum | 1073 |
Range | 1064 |
Interquartile range | 43 |
Descriptive statistics
Standard deviation | 55.378 |
---|---|
Coef of variation | 1.0113 |
Kurtosis | 57.077 |
Mean | 54.76 |
MAD | 34.063 |
Skewness | 5.1792 |
Sum | 273798 |
Variance | 3066.7 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
22 | 112 | 2.2% |
|
29 | 109 | 2.2% |
|
25 | 108 | 2.2% |
|
20 | 102 | 2.0% |
|
30 | 102 | 2.0% |
|
18 | 100 | 2.0% |
|
23 | 100 | 2.0% |
|
24 | 99 | 2.0% |
|
32 | 93 | 1.9% |
|
16 | 91 | 1.8% |
|
Other values (256) | 3984 | 79.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
9 | 83 | 1.7% |
|
10 | 55 | 1.1% |
|
11 | 57 | 1.1% |
|
12 | 52 | 1.0% |
|
13 | 56 | 1.1% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
575 | 1 | 0.0% |
|
642 | 1 | 0.0% |
|
780 | 1 | 0.0% |
|
995 | 1 | 0.0% |
|
1073 | 1 | 0.0% |
|
internet
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.1996 |
---|---|
Minimum | 0 |
Maximum | 4 |
Zeros (%) | 50.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 1 |
Q3 | 2 |
95-th percentile | 4 |
Maximum | 4 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.4493 |
---|---|
Coef of variation | 1.2082 |
Kurtosis | -0.83856 |
Mean | 1.1996 |
MAD | 1.2604 |
Skewness | 0.80841 |
Sum | 5998 |
Variance | 2.1006 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 2498 | 50.0% |
|
1 | 774 | 15.5% |
|
3 | 598 | 12.0% |
|
4 | 585 | 11.7% |
|
2 | 545 | 10.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 2498 | 50.0% |
|
1 | 774 | 15.5% |
|
2 | 545 | 10.9% |
|
3 | 598 | 12.0% |
|
4 | 585 | 11.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
0 | 2498 | 50.0% |
|
1 | 774 | 15.5% |
|
2 | 545 | 10.9% |
|
3 | 598 | 12.0% |
|
4 | 585 | 11.7% |
|
jobcat
Numeric
Distinct count | 6 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.7528 |
---|---|
Minimum | 1 |
Maximum | 6 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 2 |
Q3 | 4 |
95-th percentile | 6 |
Maximum | 6 |
Range | 5 |
Interquartile range | 3 |
Descriptive statistics
Standard deviation | 1.7379 |
---|---|
Coef of variation | 0.63132 |
Kurtosis | -0.75877 |
Mean | 2.7528 |
MAD | 1.467 |
Skewness | 0.79807 |
Sum | 13764 |
Variance | 3.0203 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
2 | 1640 | 32.8% |
|
1 | 1388 | 27.8% |
|
6 | 688 | 13.8% |
|
3 | 620 | 12.4% |
|
5 | 452 | 9.0% |
|
4 | 212 | 4.2% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1388 | 27.8% |
|
2 | 1640 | 32.8% |
|
3 | 620 | 12.4% |
|
4 | 212 | 4.2% |
|
5 | 452 | 9.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
2 | 1640 | 32.8% |
|
3 | 620 | 12.4% |
|
4 | 212 | 4.2% |
|
5 | 452 | 9.0% |
|
6 | 688 | 13.8% |
|
jobsat
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.9642 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.3795 |
---|---|
Coef of variation | 0.46537 |
Kurtosis | -1.2367 |
Mean | 2.9642 |
MAD | 1.1637 |
Skewness | 0.02675 |
Sum | 14821 |
Variance | 1.9029 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3 | 1085 | 21.7% |
|
2 | 1031 | 20.6% |
|
4 | 1016 | 20.3% |
|
1 | 975 | 19.5% |
|
5 | 893 | 17.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 975 | 19.5% |
|
2 | 1031 | 20.6% |
|
3 | 1085 | 21.7% |
|
4 | 1016 | 20.3% |
|
5 | 893 | 17.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 975 | 19.5% |
|
2 | 1031 | 20.6% |
|
3 | 1085 | 21.7% |
|
4 | 1016 | 20.3% |
|
5 | 893 | 17.9% |
|
lncardmon
Highly correlated
This variable is highly correlated with cardmon
and should be ignored for analysis
Correlation | 0.91687 |
---|
lncardten
Numeric
Distinct count | 697 |
---|---|
Unique (%) | 13.9% |
Missing (%) | 28.4% |
Missing (n) | 1422 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 6.4263 |
---|---|
Minimum | 1.5581 |
Maximum | 9.5255 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1.5581 |
---|---|
5-th percentile | 4.0943 |
Q1 | 5.8579 |
Median | 6.6399 |
Q3 | 7.2189 |
95-th percentile | 7.9233 |
Maximum | 9.5255 |
Range | 7.9674 |
Interquartile range | 1.361 |
Descriptive statistics
Standard deviation | 1.172 |
---|---|
Coef of variation | 0.18238 |
Kurtosis | 2.0266 |
Mean | 6.4263 |
MAD | 0.88427 |
Skewness | -1.1714 |
Sum | 22993 |
Variance | 1.3737 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
6.380122536899765 | 21 | 0.4% |
|
5.940171252720432 | 20 | 0.4% |
|
5.298317366548036 | 20 | 0.4% |
|
3.8066624897703196 | 19 | 0.4% |
|
5.272999558563747 | 19 | 0.4% |
|
6.214608098422191 | 19 | 0.4% |
|
6.075346031088684 | 18 | 0.4% |
|
5.68697535633982 | 18 | 0.4% |
|
5.799092654460526 | 18 | 0.4% |
|
5.393627546352362 | 18 | 0.4% |
|
Other values (686) | 3388 | 67.8% |
|
(Missing) | 1422 | 28.4% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1.55814461804655 | 1 | 0.0% |
|
1.6094379124341003 | 17 | 0.3% |
|
1.6582280766035324 | 1 | 0.0% |
|
2.0476928433652555 | 1 | 0.0% |
|
2.0794415416798357 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
8.770283819098399 | 1 | 0.0% |
|
8.869960510523953 | 1 | 0.0% |
|
8.896998552743824 | 1 | 0.0% |
|
9.20230820027892 | 1 | 0.0% |
|
9.525516008736886 | 1 | 0.0% |
|
lncreddebt
Numeric
Distinct count | 4942 |
---|---|
Unique (%) | 98.8% |
Missing (%) | 0.0% |
Missing (n) | 1 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | -0.13045 |
---|---|
Minimum | -6.5973 |
Maximum | 4.692 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -6.5973 |
---|---|
5-th percentile | -2.2916 |
Q1 | -0.95269 |
Median | -0.076106 |
Q3 | 0.72467 |
95-th percentile | 1.8523 |
Maximum | 4.692 |
Range | 11.289 |
Interquartile range | 1.6774 |
Descriptive statistics
Standard deviation | 1.2731 |
---|---|
Coef of variation | -9.7587 |
Kurtosis | 0.49262 |
Mean | -0.13045 |
MAD | 0.99857 |
Skewness | -0.29509 |
Sum | -652.14 |
Variance | 1.6207 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.22633844221072896 | 2 | 0.0% |
|
-0.5028574541168138 | 2 | 0.0% |
|
-0.981229333033066 | 2 | 0.0% |
|
-1.7618900356257774 | 2 | 0.0% |
|
-0.12284582999884598 | 2 | 0.0% |
|
-0.9930637906800688 | 2 | 0.0% |
|
-1.151987749259985 | 2 | 0.0% |
|
0.01744691360372049 | 2 | 0.0% |
|
-2.041929980602883 | 2 | 0.0% |
|
-1.4444659939752587 | 2 | 0.0% |
|
Other values (4931) | 4979 | 99.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-6.597333719560867 | 1 | 0.0% |
|
-5.681042987686712 | 1 | 0.0% |
|
-5.310389947782306 | 1 | 0.0% |
|
-5.06024579464959 | 1 | 0.0% |
|
-4.944145552827423 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
3.74001211059991 | 1 | 0.0% |
|
3.789755027436595 | 1 | 0.0% |
|
3.8857719210512522 | 1 | 0.0% |
|
4.211992033134444 | 1 | 0.0% |
|
4.692013678885976 | 1 | 0.0% |
|
lnequipmon
Highly correlated
This variable is highly correlated with equipmon
and should be ignored for analysis
Correlation | 0.97931 |
---|
lnequipten
Highly correlated
This variable is highly correlated with lntollten
and should be ignored for analysis
Correlation | 0.96611 |
---|
lninc
Numeric
Distinct count | 266 |
---|---|
Unique (%) | 5.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.6999 |
---|---|
Minimum | 2.1972 |
Maximum | 6.9782 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 2.1972 |
---|---|
5-th percentile | 2.5649 |
Q1 | 3.1781 |
Median | 3.6376 |
Q3 | 4.2047 |
95-th percentile | 4.9904 |
Maximum | 6.9782 |
Range | 4.781 |
Interquartile range | 1.0266 |
Descriptive statistics
Standard deviation | 0.74707 |
---|---|
Coef of variation | 0.20192 |
Kurtosis | -0.023788 |
Mean | 3.6999 |
MAD | 0.60138 |
Skewness | 0.35028 |
Sum | 18500 |
Variance | 0.55812 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
3.091042453358316 | 112 | 2.2% |
|
3.367295829986474 | 109 | 2.2% |
|
3.2188758248682006 | 108 | 2.2% |
|
3.4011973816621555 | 102 | 2.0% |
|
2.995732273553991 | 102 | 2.0% |
|
3.1354942159291497 | 100 | 2.0% |
|
2.8903717578961645 | 100 | 2.0% |
|
3.1780538303479458 | 99 | 2.0% |
|
3.4657359027997265 | 93 | 1.9% |
|
3.044522437723423 | 91 | 1.8% |
|
Other values (256) | 3984 | 79.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
2.1972245773362196 | 83 | 1.7% |
|
2.302585092994046 | 55 | 1.1% |
|
2.3978952727983707 | 57 | 1.1% |
|
2.4849066497880004 | 52 | 1.0% |
|
2.5649493574615367 | 56 | 1.1% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
6.354370040797351 | 1 | 0.0% |
|
6.464588303689961 | 1 | 0.0% |
|
6.659293919683638 | 1 | 0.0% |
|
6.902742737158593 | 1 | 0.0% |
|
6.9782137426306985 | 1 | 0.0% |
|
lnlongmon
Numeric
Distinct count | 866 |
---|---|
Unique (%) | 17.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.2888 |
---|---|
Minimum | -0.10536 |
Maximum | 5.1921 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -0.10536 |
---|---|
5-th percentile | 1.0647 |
Q1 | 1.7405 |
Median | 2.2565 |
Q3 | 2.8064 |
95-th percentile | 3.6043 |
Maximum | 5.1921 |
Range | 5.2975 |
Interquartile range | 1.0659 |
Descriptive statistics
Standard deviation | 0.77518 |
---|---|
Coef of variation | 0.33869 |
Kurtosis | -0.08563 |
Mean | 2.2888 |
MAD | 0.62183 |
Skewness | 0.17355 |
Sum | 11444 |
Variance | 0.6009 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1.4350845252893227 | 31 | 0.6% |
|
1.6094379124341003 | 29 | 0.6% |
|
2.066862759472976 | 28 | 0.6% |
|
1.7316555451583497 | 25 | 0.5% |
|
2.0149030205422647 | 25 | 0.5% |
|
1.55814461804655 | 24 | 0.5% |
|
1.3737155789130306 | 24 | 0.5% |
|
1.6389967146756448 | 24 | 0.5% |
|
1.599387576580599 | 24 | 0.5% |
|
1.5151272329628591 | 23 | 0.5% |
|
Other values (856) | 4743 | 94.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-0.10536051565782628 | 2 | 0.0% |
|
-0.05129329438755058 | 1 | 0.0% |
|
0.0 | 1 | 0.0% |
|
0.04879016416943205 | 1 | 0.0% |
|
0.09531017980432493 | 2 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4.7340030578310595 | 1 | 0.0% |
|
4.741011408899505 | 1 | 0.0% |
|
4.867534450455582 | 1 | 0.0% |
|
4.94306997460049 | 1 | 0.0% |
|
5.192123170141633 | 1 | 0.0% |
|
lnlongten
Highly correlated
This variable is highly correlated with lnlongmon
and should be ignored for analysis
Correlation | 0.92171 |
---|
lnothdebt
Numeric
Distinct count | 4973 |
---|---|
Unique (%) | 99.5% |
Missing (%) | 0.0% |
Missing (n) | 1 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.69692 |
---|---|
Minimum | -4.0921 |
Maximum | 4.952 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | -4.0921 |
---|---|
5-th percentile | -1.2435 |
Q1 | -0.018987 |
Median | 0.74154 |
Q3 | 1.4621 |
95-th percentile | 2.4696 |
Maximum | 4.952 |
Range | 9.0441 |
Interquartile range | 1.481 |
Descriptive statistics
Standard deviation | 1.1286 |
---|---|
Coef of variation | 1.6194 |
Kurtosis | 0.30242 |
Mean | 0.69692 |
MAD | 0.89086 |
Skewness | -0.23209 |
Sum | 3483.9 |
Variance | 1.2737 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.10690811750408048 | 3 | 0.1% |
|
0.32082183403905973 | 2 | 0.0% |
|
0.586545895465986 | 2 | 0.0% |
|
-1.70683025844275 | 2 | 0.0% |
|
0.19724957737819138 | 2 | 0.0% |
|
1.0522751134597443 | 2 | 0.0% |
|
-0.1026973982383425 | 2 | 0.0% |
|
-0.8003093181643999 | 2 | 0.0% |
|
0.6127394062576954 | 2 | 0.0% |
|
-1.0346014067076095 | 2 | 0.0% |
|
Other values (4962) | 4978 | 99.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
-4.092107067281908 | 1 | 0.0% |
|
-3.880718155599734 | 1 | 0.0% |
|
-3.8532825104055455 | 1 | 0.0% |
|
-3.7984264988456053 | 1 | 0.0% |
|
-3.3090740927863496 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4.08389414634726 | 1 | 0.0% |
|
4.092846640897909 | 1 | 0.0% |
|
4.203615920555767 | 1 | 0.0% |
|
4.627991820121045 | 1 | 0.0% |
|
4.952010982824063 | 1 | 0.0% |
|
lntollmon
Highly correlated
This variable is highly correlated with tollmon
and should be ignored for analysis
Correlation | 0.93783 |
---|
lntollten
Highly correlated
This variable is highly correlated with lnlongten
and should be ignored for analysis
Correlation | 0.93139 |
---|
lnwiremon
Highly correlated
This variable is highly correlated with wiremon
and should be ignored for analysis
Correlation | 0.95389 |
---|
lnwireten
Highly correlated
This variable is highly correlated with lnequipten
and should be ignored for analysis
Correlation | 0.98318 |
---|
longmon
Numeric
Distinct count | 866 |
---|---|
Unique (%) | 17.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 13.471 |
---|---|
Minimum | 0.9 |
Maximum | 179.85 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0.9 |
---|---|
5-th percentile | 2.9 |
Q1 | 5.7 |
Median | 9.55 |
Q3 | 16.55 |
95-th percentile | 36.758 |
Maximum | 179.85 |
Range | 178.95 |
Interquartile range | 10.85 |
Descriptive statistics
Standard deviation | 12.773 |
---|---|
Coef of variation | 0.94818 |
Kurtosis | 18.503 |
Mean | 13.471 |
MAD | 8.4049 |
Skewness | 3.2719 |
Sum | 67357 |
Variance | 163.16 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4.2 | 31 | 0.6% |
|
5.0 | 29 | 0.6% |
|
7.9 | 28 | 0.6% |
|
5.65 | 25 | 0.5% |
|
7.5 | 25 | 0.5% |
|
5.15 | 24 | 0.5% |
|
4.75 | 24 | 0.5% |
|
3.95 | 24 | 0.5% |
|
4.95 | 24 | 0.5% |
|
4.3 | 23 | 0.5% |
|
Other values (856) | 4743 | 94.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.9 | 2 | 0.0% |
|
0.95 | 1 | 0.0% |
|
1.0 | 1 | 0.0% |
|
1.05 | 1 | 0.0% |
|
1.1 | 2 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
113.75 | 1 | 0.0% |
|
114.55 | 1 | 0.0% |
|
130.0 | 1 | 0.0% |
|
140.2 | 1 | 0.0% |
|
179.85 | 1 | 0.0% |
|
longten
Highly correlated
This variable is highly correlated with longmon
and should be ignored for analysis
Correlation | 0.9857 |
---|
marital
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4802 |
---|
0 |
2599
|
---|---|
1 |
2401
|
Value | Count | Frequency (%) | |
0 | 2599 | 52.0% |
|
1 | 2401 | 48.0% |
|
multline
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4884 |
---|
0 |
2558
|
---|---|
1 |
2442
|
Value | Count | Frequency (%) | |
0 | 2558 | 51.2% |
|
1 | 2442 | 48.8% |
|
news
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4726 |
---|
0 |
2637
|
---|---|
1 |
2363
|
Value | Count | Frequency (%) | |
0 | 2637 | 52.7% |
|
1 | 2363 | 47.3% |
|
othdebt
Numeric
Distinct count | 4973 |
---|---|
Unique (%) | 99.5% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.6545 |
---|---|
Minimum | 0 |
Maximum | 141.46 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0.28769 |
Q1 | 0.9803 |
Median | 2.0985 |
Q3 | 4.3148 |
95-th percentile | 11.816 |
Maximum | 141.46 |
Range | 141.46 |
Interquartile range | 3.3345 |
Descriptive statistics
Standard deviation | 5.3952 |
---|---|
Coef of variation | 1.4763 |
Kurtosis | 125.15 |
Mean | 3.6545 |
MAD | 2.972 |
Skewness | 7.5899 |
Sum | 18272 |
Variance | 29.108 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1.112832 | 3 | 0.1% |
|
4.6926000000000005 | 2 | 0.0% |
|
4.45536 | 2 | 0.0% |
|
1.218048 | 2 | 0.0% |
|
1.131624 | 2 | 0.0% |
|
0.18144 | 2 | 0.0% |
|
0.531696 | 2 | 0.0% |
|
2.86416 | 2 | 0.0% |
|
0.355368 | 2 | 0.0% |
|
2.2800960000000003 | 2 | 0.0% |
|
Other values (4963) | 4979 | 99.6% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 1 | 0.0% |
|
0.016704 | 1 | 0.0% |
|
0.020636 | 1 | 0.0% |
|
0.02121000000000001 | 1 | 0.0% |
|
0.022406 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
59.376239999999996 | 1 | 0.0% |
|
59.910192 | 1 | 0.0% |
|
66.9279 | 1 | 0.0% |
|
102.308404 | 1 | 0.0% |
|
141.45915 | 1 | 0.0% |
|
owncd
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.9328 |
---|
1 |
4664
|
---|---|
0 |
|
Value | Count | Frequency (%) | |
1 | 4664 | 93.3% |
|
0 | 336 | 6.7% |
|
owndvd
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.9136 |
---|
1 |
4568
|
---|---|
0 |
|
Value | Count | Frequency (%) | |
1 | 4568 | 91.4% |
|
0 | 432 | 8.6% |
|
ownfax
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1788 |
---|
0 |
4106
|
---|---|
1 |
894
|
Value | Count | Frequency (%) | |
0 | 4106 | 82.1% |
|
1 | 894 | 17.9% |
|
owngame
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4748 |
---|
0 |
2626
|
---|---|
1 |
2374
|
Value | Count | Frequency (%) | |
0 | 2626 | 52.5% |
|
1 | 2374 | 47.5% |
|
ownipod
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4792 |
---|
0 |
2604
|
---|---|
1 |
2396
|
Value | Count | Frequency (%) | |
0 | 2604 | 52.1% |
|
1 | 2396 | 47.9% |
|
ownpc
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.6328 |
---|
1 |
3164
|
---|---|
0 |
1836
|
Value | Count | Frequency (%) | |
1 | 3164 | 63.3% |
|
0 | 1836 | 36.7% |
|
ownpda
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.201 |
---|
0 |
3995
|
---|---|
1 |
1005
|
Value | Count | Frequency (%) | |
0 | 3995 | 79.9% |
|
1 | 1005 | 20.1% |
|
owntv
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.983 |
---|
1 |
4915
|
---|---|
0 |
|
Value | Count | Frequency (%) | |
1 | 4915 | 98.3% |
|
0 | 85 | 1.7% |
|
ownvcr
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.9156 |
---|
1 |
4578
|
---|---|
0 |
|
Value | Count | Frequency (%) | |
1 | 4578 | 91.6% |
|
0 | 422 | 8.4% |
|
pager
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2436 |
---|
0 |
3782
|
---|---|
1 |
1218
|
Value | Count | Frequency (%) | |
0 | 3782 | 75.6% |
|
1 | 1218 | 24.4% |
|
pets
Numeric
Distinct count | 20 |
---|---|
Unique (%) | 0.4% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.0674 |
---|---|
Minimum | 0 |
Maximum | 21 |
Zeros (%) | 30.6% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 2 |
Q3 | 5 |
95-th percentile | 10 |
Maximum | 21 |
Range | 21 |
Interquartile range | 5 |
Descriptive statistics
Standard deviation | 3.4145 |
---|---|
Coef of variation | 1.1132 |
Kurtosis | 0.89599 |
Mean | 3.0674 |
MAD | 2.7814 |
Skewness | 1.1938 |
Sum | 15337 |
Variance | 11.659 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 1529 | 30.6% |
|
1 | 780 | 15.6% |
|
2 | 586 | 11.7% |
|
3 | 376 | 7.5% |
|
5 | 298 | 6.0% |
|
4 | 284 | 5.7% |
|
6 | 256 | 5.1% |
|
7 | 246 | 4.9% |
|
8 | 178 | 3.6% |
|
9 | 170 | 3.4% |
|
Other values (10) | 297 | 5.9% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 1529 | 30.6% |
|
1 | 780 | 15.6% |
|
2 | 586 | 11.7% |
|
3 | 376 | 7.5% |
|
4 | 284 | 5.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
15 | 10 | 0.2% |
|
16 | 9 | 0.2% |
|
18 | 1 | 0.0% |
|
19 | 3 | 0.1% |
|
21 | 1 | 0.0% |
|
pets_birds
Numeric
Distinct count | 6 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.1104 |
---|---|
Minimum | 0 |
Maximum | 5 |
Zeros (%) | 94.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 1 |
Maximum | 5 |
Range | 5 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.49423 |
---|---|
Coef of variation | 4.4767 |
Kurtosis | 29.823 |
Mean | 0.1104 |
MAD | 0.20746 |
Skewness | 5.2464 |
Sum | 552 |
Variance | 0.24426 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 4698 | 94.0% |
|
1 | 144 | 2.9% |
|
2 | 88 | 1.8% |
|
3 | 49 | 1.0% |
|
4 | 20 | 0.4% |
|
5 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 4698 | 94.0% |
|
1 | 144 | 2.9% |
|
2 | 88 | 1.8% |
|
3 | 49 | 1.0% |
|
4 | 20 | 0.4% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 144 | 2.9% |
|
2 | 88 | 1.8% |
|
3 | 49 | 1.0% |
|
4 | 20 | 0.4% |
|
5 | 1 | 0.0% |
|
pets_cats
Numeric
Distinct count | 7 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.5004 |
---|---|
Minimum | 0 |
Maximum | 6 |
Zeros (%) | 68.3% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 1 |
95-th percentile | 2 |
Maximum | 6 |
Range | 6 |
Interquartile range | 1 |
Descriptive statistics
Standard deviation | 0.86078 |
---|---|
Coef of variation | 1.7202 |
Kurtosis | 3.5056 |
Mean | 0.5004 |
MAD | 0.68315 |
Skewness | 1.8804 |
Sum | 2502 |
Variance | 0.74095 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 3413 | 68.3% |
|
1 | 923 | 18.5% |
|
2 | 463 | 9.3% |
|
3 | 160 | 3.2% |
|
4 | 34 | 0.7% |
|
5 | 5 | 0.1% |
|
6 | 2 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 3413 | 68.3% |
|
1 | 923 | 18.5% |
|
2 | 463 | 9.3% |
|
3 | 160 | 3.2% |
|
4 | 34 | 0.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
2 | 463 | 9.3% |
|
3 | 160 | 3.2% |
|
4 | 34 | 0.7% |
|
5 | 5 | 0.1% |
|
6 | 2 | 0.0% |
|
pets_dogs
Numeric
Distinct count | 7 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.3924 |
---|---|
Minimum | 0 |
Maximum | 7 |
Zeros (%) | 75.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 2 |
Maximum | 7 |
Range | 7 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.79608 |
---|---|
Coef of variation | 2.0288 |
Kurtosis | 5.8784 |
Mean | 0.3924 |
MAD | 0.59048 |
Skewness | 2.3301 |
Sum | 1962 |
Variance | 0.63375 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 3762 | 75.2% |
|
1 | 720 | 14.4% |
|
2 | 360 | 7.2% |
|
3 | 117 | 2.3% |
|
4 | 36 | 0.7% |
|
5 | 4 | 0.1% |
|
7 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 3762 | 75.2% |
|
1 | 720 | 14.4% |
|
2 | 360 | 7.2% |
|
3 | 117 | 2.3% |
|
4 | 36 | 0.7% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
2 | 360 | 7.2% |
|
3 | 117 | 2.3% |
|
4 | 36 | 0.7% |
|
5 | 4 | 0.1% |
|
7 | 1 | 0.0% |
|
pets_freshfish
Numeric
Distinct count | 17 |
---|---|
Unique (%) | 0.3% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 1.8474 |
---|---|
Minimum | 0 |
Maximum | 16 |
Zeros (%) | 69.2% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 4 |
95-th percentile | 8 |
Maximum | 16 |
Range | 16 |
Interquartile range | 4 |
Descriptive statistics
Standard deviation | 3.0748 |
---|---|
Coef of variation | 1.6644 |
Kurtosis | 1.0344 |
Mean | 1.8474 |
MAD | 2.564 |
Skewness | 1.4552 |
Sum | 9237 |
Variance | 9.4544 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 3462 | 69.2% |
|
5 | 261 | 5.2% |
|
6 | 251 | 5.0% |
|
7 | 229 | 4.6% |
|
4 | 222 | 4.4% |
|
8 | 134 | 2.7% |
|
3 | 130 | 2.6% |
|
9 | 110 | 2.2% |
|
2 | 63 | 1.3% |
|
10 | 54 | 1.1% |
|
Other values (7) | 84 | 1.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 3462 | 69.2% |
|
1 | 17 | 0.3% |
|
2 | 63 | 1.3% |
|
3 | 130 | 2.6% |
|
4 | 222 | 4.4% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
12 | 18 | 0.4% |
|
13 | 4 | 0.1% |
|
14 | 4 | 0.1% |
|
15 | 5 | 0.1% |
|
16 | 1 | 0.0% |
|
pets_reptiles
Numeric
Distinct count | 7 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.0556 |
---|---|
Minimum | 0 |
Maximum | 6 |
Zeros (%) | 96.4% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 0 |
Maximum | 6 |
Range | 6 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.32578 |
---|---|
Coef of variation | 5.8593 |
Kurtosis | 72.829 |
Mean | 0.0556 |
MAD | 0.10715 |
Skewness | 7.5861 |
Sum | 278 |
Variance | 0.10613 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 4818 | 96.4% |
|
1 | 114 | 2.3% |
|
2 | 46 | 0.9% |
|
3 | 19 | 0.4% |
|
6 | 1 | 0.0% |
|
5 | 1 | 0.0% |
|
4 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 4818 | 96.4% |
|
1 | 114 | 2.3% |
|
2 | 46 | 0.9% |
|
3 | 19 | 0.4% |
|
4 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
2 | 46 | 0.9% |
|
3 | 19 | 0.4% |
|
4 | 1 | 0.0% |
|
5 | 1 | 0.0% |
|
6 | 1 | 0.0% |
|
pets_saltfish
Numeric
Distinct count | 9 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.0466 |
---|---|
Minimum | 0 |
Maximum | 8 |
Zeros (%) | 98.8% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 0 |
Maximum | 8 |
Range | 8 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.46955 |
---|---|
Coef of variation | 10.076 |
Kurtosis | 133.34 |
Mean | 0.0466 |
MAD | 0.092119 |
Skewness | 11.194 |
Sum | 233 |
Variance | 0.22047 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 4942 | 98.8% |
|
3 | 11 | 0.2% |
|
6 | 11 | 0.2% |
|
2 | 11 | 0.2% |
|
4 | 10 | 0.2% |
|
5 | 8 | 0.2% |
|
7 | 3 | 0.1% |
|
1 | 3 | 0.1% |
|
8 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 4942 | 98.8% |
|
1 | 3 | 0.1% |
|
2 | 11 | 0.2% |
|
3 | 11 | 0.2% |
|
4 | 10 | 0.2% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
4 | 10 | 0.2% |
|
5 | 8 | 0.2% |
|
6 | 11 | 0.2% |
|
7 | 3 | 0.1% |
|
8 | 1 | 0.0% |
|
pets_small
Numeric
Distinct count | 8 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 0.1146 |
---|---|
Minimum | 0 |
Maximum | 7 |
Zeros (%) | 95.0% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 0 |
95-th percentile | 1 |
Maximum | 7 |
Range | 7 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 0.5688 |
---|---|
Coef of variation | 4.9633 |
Kurtosis | 39.154 |
Mean | 0.1146 |
MAD | 0.21769 |
Skewness | 5.9087 |
Sum | 573 |
Variance | 0.32353 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0 | 4749 | 95.0% |
|
2 | 83 | 1.7% |
|
1 | 78 | 1.6% |
|
3 | 50 | 1.0% |
|
4 | 26 | 0.5% |
|
5 | 10 | 0.2% |
|
6 | 3 | 0.1% |
|
7 | 1 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0 | 4749 | 95.0% |
|
1 | 78 | 1.6% |
|
2 | 83 | 1.7% |
|
3 | 50 | 1.0% |
|
4 | 26 | 0.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
3 | 50 | 1.0% |
|
4 | 26 | 0.5% |
|
5 | 10 | 0.2% |
|
6 | 3 | 0.1% |
|
7 | 1 | 0.0% |
|
polcontrib
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2384 |
---|
0 |
3808
|
---|---|
1 |
1192
|
Value | Count | Frequency (%) | |
0 | 3808 | 76.2% |
|
1 | 1192 | 23.8% |
|
polparty
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.3814 |
---|
0 |
3093
|
---|---|
1 |
1907
|
Value | Count | Frequency (%) | |
0 | 3093 | 61.9% |
|
1 | 1907 | 38.1% |
|
polview
Numeric
Distinct count | 7 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 4.0886 |
---|---|
Minimum | 1 |
Maximum | 7 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 2 |
Q1 | 3 |
Median | 4 |
Q3 | 5 |
95-th percentile | 6 |
Maximum | 7 |
Range | 6 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.3871 |
---|---|
Coef of variation | 0.33925 |
Kurtosis | -0.5312 |
Mean | 4.0886 |
MAD | 1.0702 |
Skewness | -0.19834 |
Sum | 20443 |
Variance | 1.9239 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
4 | 1733 | 34.7% |
|
5 | 893 | 17.9% |
|
6 | 843 | 16.9% |
|
3 | 659 | 13.2% |
|
2 | 623 | 12.5% |
|
1 | 163 | 3.3% |
|
7 | 86 | 1.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 163 | 3.3% |
|
2 | 623 | 12.5% |
|
3 | 659 | 13.2% |
|
4 | 1733 | 34.7% |
|
5 | 893 | 17.9% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
3 | 659 | 13.2% |
|
4 | 1733 | 34.7% |
|
5 | 893 | 17.9% |
|
6 | 843 | 16.9% |
|
7 | 86 | 1.7% |
|
reason
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 7.6368 |
---|---|
Minimum | 1 |
Maximum | 9 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 9 |
Median | 9 |
Q3 | 9 |
95-th percentile | 9 |
Maximum | 9 |
Range | 8 |
Interquartile range | 0 |
Descriptive statistics
Standard deviation | 2.85 |
---|---|
Coef of variation | 0.37319 |
Kurtosis | 0.84805 |
Mean | 7.6368 |
MAD | 2.2095 |
Skewness | -1.6586 |
Sum | 38184 |
Variance | 8.1225 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
9 | 4052 | 81.0% |
|
1 | 447 | 8.9% |
|
2 | 339 | 6.8% |
|
4 | 105 | 2.1% |
|
3 | 57 | 1.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 447 | 8.9% |
|
2 | 339 | 6.8% |
|
3 | 57 | 1.1% |
|
4 | 105 | 2.1% |
|
9 | 4052 | 81.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 447 | 8.9% |
|
2 | 339 | 6.8% |
|
3 | 57 | 1.1% |
|
4 | 105 | 2.1% |
|
9 | 4052 | 81.0% |
|
region
Numeric
Distinct count | 5 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 3.0014 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 2 |
Median | 3 |
Q3 | 4 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.4218 |
---|---|
Coef of variation | 0.4737 |
Kurtosis | -1.309 |
Mean | 3.0014 |
MAD | 1.2069 |
Skewness | 0.0050525 |
Sum | 15007 |
Variance | 2.0214 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
5 | 1027 | 20.5% |
|
1 | 1009 | 20.2% |
|
3 | 1003 | 20.1% |
|
2 | 995 | 19.9% |
|
4 | 966 | 19.3% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 1009 | 20.2% |
|
2 | 995 | 19.9% |
|
3 | 1003 | 20.1% |
|
4 | 966 | 19.3% |
|
5 | 1027 | 20.5% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1 | 1009 | 20.2% |
|
2 | 995 | 19.9% |
|
3 | 1003 | 20.1% |
|
4 | 966 | 19.3% |
|
5 | 1027 | 20.5% |
|
reside
Numeric
Distinct count | 9 |
---|---|
Unique (%) | 0.2% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.204 |
---|---|
Minimum | 1 |
Maximum | 9 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 2 |
Q3 | 3 |
95-th percentile | 5 |
Maximum | 9 |
Range | 8 |
Interquartile range | 2 |
Descriptive statistics
Standard deviation | 1.394 |
---|---|
Coef of variation | 0.63248 |
Kurtosis | 1.0075 |
Mean | 2.204 |
MAD | 1.0998 |
Skewness | 1.2288 |
Sum | 11020 |
Variance | 1.9432 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1 | 2035 | 40.7% |
|
2 | 1467 | 29.3% |
|
3 | 552 | 11.0% |
|
4 | 521 | 10.4% |
|
5 | 288 | 5.8% |
|
6 | 99 | 2.0% |
|
7 | 29 | 0.6% |
|
8 | 7 | 0.1% |
|
9 | 2 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1 | 2035 | 40.7% |
|
2 | 1467 | 29.3% |
|
3 | 552 | 11.0% |
|
4 | 521 | 10.4% |
|
5 | 288 | 5.8% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
5 | 288 | 5.8% |
|
6 | 99 | 2.0% |
|
7 | 29 | 0.6% |
|
8 | 7 | 0.1% |
|
9 | 2 | 0.0% |
|
response_01
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.0836 |
---|
0 |
4582
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4582 | 91.6% |
|
1 | 418 | 8.4% |
|
response_02
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1298 |
---|
0 |
4351
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4351 | 87.0% |
|
1 | 649 | 13.0% |
|
response_03
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1026 |
---|
0 |
4487
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4487 | 89.7% |
|
1 | 513 | 10.3% |
|
retire
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1476 |
---|
0 |
4262
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4262 | 85.2% |
|
1 | 738 | 14.8% |
|
spoused
Highly correlated
This variable is highly correlated with marital
and should be ignored for analysis
Correlation | 0.95577 |
---|
spousedcat
Highly correlated
This variable is highly correlated with spoused
and should be ignored for analysis
Correlation | 0.98403 |
---|
telecommute
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.188 |
---|
0 |
4060
|
---|---|
1 |
940
|
Value | Count | Frequency (%) | |
0 | 4060 | 81.2% |
|
1 | 940 | 18.8% |
|
tenure
Highly correlated
This variable is highly correlated with card2tenure
and should be ignored for analysis
Correlation | 0.92824 |
---|
tollfree
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.4756 |
---|
0 |
2622
|
---|---|
1 |
2378
|
Value | Count | Frequency (%) | |
0 | 2622 | 52.4% |
|
1 | 2378 | 47.6% |
|
tollmon
Numeric
Distinct count | 235 |
---|---|
Unique (%) | 4.7% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 13.264 |
---|---|
Minimum | 0 |
Maximum | 173 |
Zeros (%) | 52.4% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 24.5 |
95-th percentile | 43.5 |
Maximum | 173 |
Range | 173 |
Interquartile range | 24.5 |
Descriptive statistics
Standard deviation | 16.31 |
---|---|
Coef of variation | 1.2296 |
Kurtosis | 2.3195 |
Mean | 13.264 |
MAD | 13.992 |
Skewness | 1.1816 |
Sum | 66322 |
Variance | 266.02 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 2622 | 52.4% |
|
22.75 | 33 | 0.7% |
|
18.0 | 33 | 0.7% |
|
24.0 | 32 | 0.6% |
|
23.0 | 31 | 0.6% |
|
22.0 | 30 | 0.6% |
|
23.75 | 30 | 0.6% |
|
20.0 | 29 | 0.6% |
|
19.0 | 29 | 0.6% |
|
19.5 | 29 | 0.6% |
|
Other values (225) | 2102 | 42.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 2622 | 52.4% |
|
8.0 | 1 | 0.0% |
|
8.5 | 2 | 0.0% |
|
8.75 | 2 | 0.0% |
|
9.0 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
86.0 | 1 | 0.0% |
|
94.75 | 1 | 0.0% |
|
99.25 | 1 | 0.0% |
|
101.75 | 1 | 0.0% |
|
173.0 | 1 | 0.0% |
|
tollten
Numeric
Distinct count | 2323 |
---|---|
Unique (%) | 46.5% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 577.83 |
---|---|
Minimum | 0 |
Maximum | 6923.4 |
Zeros (%) | 52.4% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 885.45 |
95-th percentile | 2620.2 |
Maximum | 6923.4 |
Range | 6923.4 |
Interquartile range | 885.45 |
Descriptive statistics
Standard deviation | 949.15 |
---|---|
Coef of variation | 1.6426 |
Kurtosis | 4.8793 |
Mean | 577.83 |
MAD | 712.02 |
Skewness | 2.0899 |
Sum | 2889200 |
Variance | 900890 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 2622 | 52.4% |
|
16.75 | 3 | 0.1% |
|
1480.5 | 3 | 0.1% |
|
10.0 | 3 | 0.1% |
|
68.65 | 2 | 0.0% |
|
423.2 | 2 | 0.0% |
|
478.95 | 2 | 0.0% |
|
17.75 | 2 | 0.0% |
|
1325.8 | 2 | 0.0% |
|
481.5 | 2 | 0.0% |
|
Other values (2313) | 2357 | 47.1% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 2622 | 52.4% |
|
8.75 | 1 | 0.0% |
|
10.0 | 3 | 0.1% |
|
10.5 | 1 | 0.0% |
|
10.75 | 1 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
5646.45 | 1 | 0.0% |
|
6196.5 | 1 | 0.0% |
|
6763.8 | 1 | 0.0% |
|
6843.0 | 1 | 0.0% |
|
6923.45 | 1 | 0.0% |
|
totalspend
Highly correlated
This variable is highly correlated with cardspent
and should be ignored for analysis
Correlation | 0.94149 |
---|
townsize
Numeric
Distinct count | 6 |
---|---|
Unique (%) | 0.1% |
Missing (%) | 0.0% |
Missing (n) | 2 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 2.6873 |
---|---|
Minimum | 1 |
Maximum | 5 |
Zeros (%) | 0.0% |
Quantile statistics
Minimum | 1 |
---|---|
5-th percentile | 1 |
Q1 | 1 |
Median | 3 |
Q3 | 4 |
95-th percentile | 5 |
Maximum | 5 |
Range | 4 |
Interquartile range | 3 |
Descriptive statistics
Standard deviation | 1.4259 |
---|---|
Coef of variation | 0.53062 |
Kurtosis | -1.2628 |
Mean | 2.6873 |
MAD | 1.2578 |
Skewness | 0.27662 |
Sum | 13431 |
Variance | 2.0333 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
1.0 | 1436 | 28.7% |
|
2.0 | 1048 | 21.0% |
|
3.0 | 907 | 18.1% |
|
4.0 | 857 | 17.1% |
|
5.0 | 750 | 15.0% |
|
(Missing) | 2 | 0.0% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
1.0 | 1436 | 28.7% |
|
2.0 | 1048 | 21.0% |
|
3.0 | 907 | 18.1% |
|
4.0 | 857 | 17.1% |
|
5.0 | 750 | 15.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
1.0 | 1436 | 28.7% |
|
2.0 | 1048 | 21.0% |
|
3.0 | 907 | 18.1% |
|
4.0 | 857 | 17.1% |
|
5.0 | 750 | 15.0% |
|
union
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.1512 |
---|
0 |
4244
|
---|---|
1 |
|
Value | Count | Frequency (%) | |
0 | 4244 | 84.9% |
|
1 | 756 | 15.1% |
|
voice
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.303 |
---|
0 |
3485
|
---|---|
1 |
1515
|
Value | Count | Frequency (%) | |
0 | 3485 | 69.7% |
|
1 | 1515 | 30.3% |
|
vote
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.518 |
---|
1 |
2590
|
---|---|
0 |
2410
|
Value | Count | Frequency (%) | |
1 | 2590 | 51.8% |
|
0 | 2410 | 48.2% |
|
wireless
Boolean
Distinct count | 2 |
---|---|
Unique (%) | 0.0% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Mean | 0.2688 |
---|
0 |
3656
|
---|---|
1 |
1344
|
Value | Count | Frequency (%) | |
0 | 3656 | 73.1% |
|
1 | 1344 | 26.9% |
|
wiremon
Numeric
Distinct count | 746 |
---|---|
Unique (%) | 14.9% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 10.701 |
---|---|
Minimum | 0 |
Maximum | 186.25 |
Zeros (%) | 73.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 20.962 |
95-th percentile | 51.305 |
Maximum | 186.25 |
Range | 186.25 |
Interquartile range | 20.962 |
Descriptive statistics
Standard deviation | 19.8 |
---|---|
Coef of variation | 1.8502 |
Kurtosis | 4.7838 |
Mean | 10.701 |
MAD | 15.649 |
Skewness | 1.998 |
Sum | 53506 |
Variance | 392.03 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 3656 | 73.1% |
|
34.3 | 9 | 0.2% |
|
22.95 | 7 | 0.1% |
|
27.95 | 7 | 0.1% |
|
32.2 | 7 | 0.1% |
|
39.3 | 6 | 0.1% |
|
22.5 | 6 | 0.1% |
|
31.15 | 5 | 0.1% |
|
43.2 | 5 | 0.1% |
|
27.75 | 5 | 0.1% |
|
Other values (736) | 1287 | 25.7% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 3656 | 73.1% |
|
12.7 | 1 | 0.0% |
|
14.0 | 1 | 0.0% |
|
14.55 | 1 | 0.0% |
|
14.7 | 2 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
119.1 | 1 | 0.0% |
|
130.75 | 1 | 0.0% |
|
134.45 | 1 | 0.0% |
|
165.15 | 1 | 0.0% |
|
186.25 | 1 | 0.0% |
|
wireten
Numeric
Distinct count | 1328 |
---|---|
Unique (%) | 26.6% |
Missing (%) | 0.0% |
Missing (n) | 0 |
Infinite (%) | 0.0% |
Infinite (n) | 0 |
Mean | 421.98 |
---|---|
Minimum | 0 |
Maximum | 12859 |
Zeros (%) | 73.1% |
Quantile statistics
Minimum | 0 |
---|---|
5-th percentile | 0 |
Q1 | 0 |
Median | 0 |
Q3 | 89.962 |
95-th percentile | 2687.9 |
Maximum | 12859 |
Range | 12859 |
Interquartile range | 89.962 |
Descriptive statistics
Standard deviation | 1001 |
---|---|
Coef of variation | 2.3721 |
Kurtosis | 15.464 |
Mean | 421.98 |
MAD | 646.82 |
Skewness | 3.3042 |
Sum | 2109900 |
Variance | 1002000 |
Memory size | 39.1 KiB |
Value | Count | Frequency (%) | |
0.0 | 3656 | 73.1% |
|
2182.05 | 2 | 0.0% |
|
2386.25 | 2 | 0.0% |
|
2323.8 | 2 | 0.0% |
|
1062.75 | 2 | 0.0% |
|
2049.85 | 2 | 0.0% |
|
1199.2 | 2 | 0.0% |
|
183.1 | 2 | 0.0% |
|
20.9 | 2 | 0.0% |
|
1073.95 | 2 | 0.0% |
|
Other values (1318) | 1326 | 26.5% |
|
Minimum 5 values
Value | Count | Frequency (%) | |
0.0 | 3656 | 73.1% |
|
12.7 | 1 | 0.0% |
|
14.55 | 1 | 0.0% |
|
14.6 | 1 | 0.0% |
|
14.9 | 2 | 0.0% |
|
Maximum 5 values
Value | Count | Frequency (%) | |
7217.1 | 1 | 0.0% |
|
8675.05 | 1 | 0.0% |
|
9039.8 | 1 | 0.0% |
|
9851.15 | 1 | 0.0% |
|
12858.65 | 1 | 0.0% |
|
custid | region | townsize | gender | age | agecat | birthmonth | ed | edcat | jobcat | union | employ | empcat | retire | income | lninc | inccat | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | default | jobsat | marital | spoused | spousedcat | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | homeown | hometype | address | addresscat | cars | carown | cartype | carvalue | carcatvalue | carbought | carbuy | commute | commutecat | commutetime | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardbenefit | cardfee | cardtenure | cardtenurecat | card2 | card2type | card2benefit | card2fee | card2tenure | card2tenurecat | cardspent | card2spent | active | bfast | tenure | churn | longmon | lnlongmon | longten | lnlongten | tollfree | tollmon | lntollmon | tollten | lntollten | equip | equipmon | lnequipmon | equipten | lnequipten | callcard | cardmon | lncardmon | cardten | lncardten | wireless | wiremon | lnwiremon | wireten | lnwireten | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | hourstv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | totalspend | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3964-QJWTRG-NPN | 1 | 2.0 | 1 | 20 | 2 | September | 15 | 3 | 1 | 1 | 0 | 1 | 0 | 31 | 3.433987 | 2 | 11.1 | 1.200909 | 0.183079 | 2.240091 | 0.806516 | 1 | 1 | 0 | -1 | -1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 1 | 0 | 14.3 | 1 | 0 | 0 | 8 | 4 | 22.0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 9 | 6 | 1 | 0 | 1 | 3 | 1 | 1 | 0 | 2 | 2 | 5 | 3 | 1 | 0 | 3 | 2 | 81.66 | 67.80 | 0 | 3 | 5 | 1 | 6.50 | 1.871802 | 34.40 | 3.538057 | 1 | 29.0 | 3.367296 | 161.05 | 5.081715 | 1 | 29.50 | 3.384390 | 126.1 | 4.837075 | 1 | 14.25 | 2.656757 | 60.0 | 4.094345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 13 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 149.46 |
1 | 0648-AIPJSP-UVM | 5 | 5.0 | 0 | 22 | 2 | May | 17 | 4 | 2 | 0 | 0 | 1 | 0 | 15 | 2.708050 | 1 | 18.6 | 1.222020 | 0.200505 | 1.567980 | 0.449788 | 1 | 1 | 0 | -1 | -1 | 2 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 | 3 | 2 | 1 | 2 | 1 | 1 | 6.8 | 1 | 0 | 0 | 1 | 1 | 29.0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 9 | 4 | 1 | 0 | 0 | 2 | 4 | 1 | 0 | 4 | 2 | 4 | 1 | 3 | 0 | 4 | 2 | 42.60 | 34.94 | 1 | 1 | 39 | 0 | 8.90 | 2.186051 | 330.60 | 5.800909 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 54.85 | 4.004602 | 1975.0 | 7.588324 | 1 | 16.00 | 2.772589 | 610.0 | 6.413459 | 1 | 45.65 | 3.821004 | 1683.55 | 7.428660 | 1 | 1 | 1 | 4 | 1 | 0 | 1 | 0 | 1 | 1 | 18 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 77.54 |
2 | 5195-TLUDJE-HVO | 3 | 4.0 | 1 | 67 | 6 | June | 14 | 2 | 2 | 0 | 16 | 5 | 0 | 35 | 3.555348 | 2 | 9.9 | 0.928620 | -0.074056 | 2.536380 | 0.930738 | 0 | 4 | 1 | 13 | 2 | 3 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 30 | 5 | 3 | 1 | 1 | 18.8 | 1 | 0 | 1 | 4 | 3 | 24.0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 5 | 1 | 0 | 0 | 2 | 1 | 4 | 0 | 35 | 5 | 4 | 1 | 3 | 0 | 25 | 5 | 184.22 | 175.75 | 0 | 3 | 65 | 0 | 28.40 | 3.346389 | 1858.35 | 7.527444 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 23.00 | 3.135494 | 1410.0 | 7.251345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 21 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 359.97 |
3 | 4459-VLPQUH-3OL | 4 | 3.0 | 0 | 23 | 2 | May | 16 | 3 | 2 | 0 | 0 | 1 | 0 | 20 | 2.995732 | 1 | 5.7 | 0.022800 | -3.780995 | 1.117200 | 0.110826 | 1 | 2 | 1 | 18 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 3 | 2 | 3 | 1 | 1 | 8.7 | 1 | 0 | 1 | 1 | 1 | 38.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 0 | 0 | 0 | 2 | 1 | 4 | 0 | 5 | 2 | 3 | 2 | 4 | 0 | 5 | 2 | 340.99 | 18.42 | 1 | 1 | 36 | 0 | 6.00 | 1.791759 | 199.45 | 5.295564 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 21.00 | 3.044522 | 685.0 | 6.529419 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 26 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 359.41 |
4 | 8158-SMTQFB-CNO | 2 | 2.0 | 0 | 26 | 3 | July | 16 | 3 | 2 | 0 | 1 | 1 | 0 | 23 | 3.135494 | 1 | 1.7 | 0.214659 | -1.538705 | 0.176341 | -1.735336 | 0 | 1 | 1 | 13 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 2 | 1 | 0 | 1 | 10.6 | 1 | 0 | 1 | 6 | 3 | 32.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 8 | 3 | 1 | 3 | 2 | 0 | 9 | 3 | 255.10 | 252.73 | 1 | 3 | 21 | 0 | 3.05 | 1.115142 | 74.10 | 4.305416 | 1 | 16.5 | 2.803360 | 387.70 | 5.960232 | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 17.25 | 2.847812 | 360.0 | 5.886104 | 1 | 19.05 | 2.947067 | 410.80 | 6.018106 | 0 | 1 | 0 | 3 | 1 | 1 | 1 | 1 | 0 | 1 | 27 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 507.83 |
#Drop cardspent(first card spent amount) and card2spent(Second card spent amount) because its not adding any value
#Also we are droping "custid","birthmonth" bacuse cust id is unique and birth month is not adding any value
custdata_df.drop(["cardspent","card2spent","custid"],axis=1, inplace=True)
#Item count need to drop promary and secondary Iem count
# Seaborn scatter plot with regression line
# aspect=1.5, scatter_kws={'alpha':0.2}) - You can choose this option too.
sns.lmplot(x='income', y='totalspend', data=custdata_df)
<seaborn.axisgrid.FacetGrid at 0xba09c88>
So we can see above most data points are near to line which means our data is normal and we can go ahed for further process.
# Creating Categorical List= Contains categorical variables...
for x in ['region','townsize','gender','agecat','edcat','birthmonth','jobcat','union','employ','empcat','retire',
'inccat','default','jobsat','marital','spousedcat','homeown','hometype','address','addresscat','cars','carown',
'cartype','carcatvalue','carbought','carbuy','commute','commutecat','commutecar',
'commutemotorcycle','commutecarpool','commutebus','commuterail','commutepublic','commutebike','commutewalk',
'commutenonmotor','telecommute','reason','polview','polparty','polcontrib','vote','card','cardtype','cardbenefit',
'cardfee','cardtenure','cardtenurecat','card2','card2type','card2benefit','card2fee','card2tenure','card2tenurecat',
'active','bfast','churn','tollfree','equip','callcard','wireless','multline','voice','pager','internet','callid',
'callwait','forward','confer','ebill','owntv','ownvcr','owndvd','owncd','ownpda','ownpc','ownipod','owngame','ownfax',
'news','response_01','response_02','response_03']:
custdata_df[x]=custdata_df[x].astype('object')
To do that you need to apply for loop along with if condition.
# Find numerical variable in Data frame.
# This will return a list
numeric_var_names = [key for key in dict(custdata_df.dtypes) if dict(custdata_df.dtypes)[key] in ['float64', 'int64', 'float32', 'int32']]
# Find Categorical variable in Data frame
cat_var_names = [key for key in dict(custdata_df.dtypes) if dict(custdata_df.dtypes)[key] in ['object']]
#Print the data frame
print( numeric_var_names)
print(cat_var_names)
['age', 'ed', 'income', 'lninc', 'debtinc', 'creddebt', 'lncreddebt', 'othdebt', 'lnothdebt', 'spoused', 'reside', 'pets', 'pets_cats', 'pets_dogs', 'pets_birds', 'pets_reptiles', 'pets_small', 'pets_saltfish', 'pets_freshfish', 'carvalue', 'commutetime', 'tenure', 'longmon', 'lnlongmon', 'longten', 'lnlongten', 'tollmon', 'lntollmon', 'tollten', 'lntollten', 'equipmon', 'lnequipmon', 'equipten', 'lnequipten', 'cardmon', 'lncardmon', 'cardten', 'lncardten', 'wiremon', 'lnwiremon', 'wireten', 'lnwireten', 'hourstv', 'totalspend'] ['region', 'townsize', 'gender', 'agecat', 'birthmonth', 'edcat', 'jobcat', 'union', 'employ', 'empcat', 'retire', 'inccat', 'default', 'jobsat', 'marital', 'spousedcat', 'homeown', 'hometype', 'address', 'addresscat', 'cars', 'carown', 'cartype', 'carcatvalue', 'carbought', 'carbuy', 'commute', 'commutecat', 'commutecar', 'commutemotorcycle', 'commutecarpool', 'commutebus', 'commuterail', 'commutepublic', 'commutebike', 'commutewalk', 'commutenonmotor', 'telecommute', 'reason', 'polview', 'polparty', 'polcontrib', 'vote', 'card', 'cardtype', 'cardbenefit', 'cardfee', 'cardtenure', 'cardtenurecat', 'card2', 'card2type', 'card2benefit', 'card2fee', 'card2tenure', 'card2tenurecat', 'active', 'bfast', 'churn', 'tollfree', 'equip', 'callcard', 'wireless', 'multline', 'voice', 'pager', 'internet', 'callid', 'callwait', 'forward', 'confer', 'ebill', 'owntv', 'ownvcr', 'owndvd', 'owncd', 'ownpda', 'ownpc', 'ownipod', 'owngame', 'ownfax', 'news', 'response_01', 'response_02', 'response_03']
#Convert a list in to dataframe
#Information about numericvariable in custdata_df
custdata_df_num=custdata_df[numeric_var_names]
custdata_df_num.head(5)
age | ed | income | lninc | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | spoused | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | carvalue | commutetime | tenure | longmon | lnlongmon | longten | lnlongten | tollmon | lntollmon | tollten | lntollten | equipmon | lnequipmon | equipten | lnequipten | cardmon | lncardmon | cardten | lncardten | wiremon | lnwiremon | wireten | lnwireten | hourstv | totalspend | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20 | 15 | 31 | 3.433987 | 11.1 | 1.200909 | 0.183079 | 2.240091 | 0.806516 | -1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 14.3 | 22.0 | 5 | 6.50 | 1.871802 | 34.40 | 3.538057 | 29.0 | 3.367296 | 161.05 | 5.081715 | 29.50 | 3.384390 | 126.1 | 4.837075 | 14.25 | 2.656757 | 60.0 | 4.094345 | 0.00 | NaN | 0.00 | NaN | 13 | 149.46 |
1 | 22 | 17 | 15 | 2.708050 | 18.6 | 1.222020 | 0.200505 | 1.567980 | 0.449788 | -1 | 2 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 6.8 | 29.0 | 39 | 8.90 | 2.186051 | 330.60 | 5.800909 | 0.0 | NaN | 0.00 | NaN | 54.85 | 4.004602 | 1975.0 | 7.588324 | 16.00 | 2.772589 | 610.0 | 6.413459 | 45.65 | 3.821004 | 1683.55 | 7.428660 | 18 | 77.54 |
2 | 67 | 14 | 35 | 3.555348 | 9.9 | 0.928620 | -0.074056 | 2.536380 | 0.930738 | 13 | 3 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 18.8 | 24.0 | 65 | 28.40 | 3.346389 | 1858.35 | 7.527444 | 0.0 | NaN | 0.00 | NaN | 0.00 | NaN | 0.0 | NaN | 23.00 | 3.135494 | 1410.0 | 7.251345 | 0.00 | NaN | 0.00 | NaN | 21 | 359.97 |
3 | 23 | 16 | 20 | 2.995732 | 5.7 | 0.022800 | -3.780995 | 1.117200 | 0.110826 | 18 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 8.7 | 38.0 | 36 | 6.00 | 1.791759 | 199.45 | 5.295564 | 0.0 | NaN | 0.00 | NaN | 0.00 | NaN | 0.0 | NaN | 21.00 | 3.044522 | 685.0 | 6.529419 | 0.00 | NaN | 0.00 | NaN | 26 | 359.41 |
4 | 26 | 16 | 23 | 3.135494 | 1.7 | 0.214659 | -1.538705 | 0.176341 | -1.735336 | 13 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10.6 | 32.0 | 21 | 3.05 | 1.115142 | 74.10 | 4.305416 | 16.5 | 2.803360 | 387.70 | 5.960232 | 0.00 | NaN | 0.0 | NaN | 17.25 | 2.847812 | 360.0 | 5.886104 | 19.05 | 2.947067 | 410.80 | 6.018106 | 27 | 507.83 |
#Convert a list in to dataframe
#Information about categorical variable in custdata_df
custdata_df_cat=custdata_df[cat_var_names]
custdata_df_cat.head(5)
region | townsize | gender | agecat | birthmonth | edcat | jobcat | union | employ | empcat | retire | inccat | default | jobsat | marital | spousedcat | homeown | hometype | address | addresscat | cars | carown | cartype | carcatvalue | carbought | carbuy | commute | commutecat | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardbenefit | cardfee | cardtenure | cardtenurecat | card2 | card2type | card2benefit | card2fee | card2tenure | card2tenurecat | active | bfast | churn | tollfree | equip | callcard | wireless | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | 1 | 2 | September | 3 | 1 | 1 | 0 | 1 | 0 | 2 | 1 | 1 | 0 | -1 | 0 | 2 | 0 | 1 | 2 | 1 | 0 | 1 | 0 | 0 | 8 | 4 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 9 | 6 | 1 | 0 | 1 | 3 | 1 | 1 | 0 | 2 | 2 | 5 | 3 | 1 | 0 | 3 | 2 | 0 | 3 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
1 | 5 | 5 | 0 | 2 | May | 4 | 2 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | -1 | 1 | 3 | 2 | 1 | 2 | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 9 | 4 | 1 | 0 | 0 | 2 | 4 | 1 | 0 | 4 | 2 | 4 | 1 | 3 | 0 | 4 | 2 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 4 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
2 | 3 | 4 | 1 | 6 | June | 2 | 2 | 0 | 16 | 5 | 0 | 2 | 0 | 4 | 1 | 2 | 1 | 1 | 30 | 5 | 3 | 1 | 1 | 1 | 0 | 1 | 4 | 3 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 5 | 1 | 0 | 0 | 2 | 1 | 4 | 0 | 35 | 5 | 4 | 1 | 3 | 0 | 25 | 5 | 0 | 3 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
3 | 4 | 3 | 0 | 2 | May | 3 | 2 | 0 | 0 | 1 | 0 | 1 | 1 | 2 | 1 | 4 | 1 | 3 | 3 | 2 | 3 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 0 | 0 | 0 | 2 | 1 | 4 | 0 | 5 | 2 | 3 | 2 | 4 | 0 | 5 | 2 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 |
4 | 2 | 2 | 0 | 3 | July | 3 | 2 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 2 | 0 | 2 | 3 | 2 | 1 | 0 | 1 | 1 | 0 | 1 | 6 | 3 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 8 | 3 | 1 | 3 | 2 | 0 | 9 | 3 | 1 | 3 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 3 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
# Use a general function that returns multiple values
def var_summary(x):
return pd.Series([x.count(), x.isnull().sum(), x.sum(), x.mean(), x.median(), x.std(), x.var(), x.min(), x.dropna().quantile(0.01), x.dropna().quantile(0.05),x.dropna().quantile(0.10),x.dropna().quantile(0.25),x.dropna().quantile(0.50),x.dropna().quantile(0.75), x.dropna().quantile(0.90),x.dropna().quantile(0.95), x.dropna().quantile(0.99),x.max()],
index=['N', 'NMISS', 'SUM', 'MEAN','MEDIAN', 'STD', 'VAR', 'MIN', 'P1' , 'P5' ,'P10' ,'P25' ,'P50' ,'P75' ,'P90' ,'P95' ,'P99' ,'MAX'])
num_summary=custdata_df_num.apply(lambda x: var_summary(x)).T
num_summary
N | NMISS | SUM | MEAN | MEDIAN | STD | VAR | MIN | P1 | P5 | P10 | P25 | P50 | P75 | P90 | P95 | P99 | MAX | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
age | 5000.0 | 0.0 | 2.351280e+05 | 47.025600 | 47.000000 | 17.770338 | 3.157849e+02 | 18.000000 | 18.000000 | 20.000000 | 23.000000 | 31.000000 | 47.000000 | 62.000000 | 72.000000 | 76.000000 | 79.000000 | 79.000000 |
ed | 5000.0 | 0.0 | 7.271500e+04 | 14.543000 | 14.000000 | 3.281083 | 1.076550e+01 | 6.000000 | 8.000000 | 9.000000 | 10.000000 | 12.000000 | 14.000000 | 17.000000 | 19.000000 | 20.000000 | 21.000000 | 23.000000 |
income | 5000.0 | 0.0 | 2.737980e+05 | 54.759600 | 38.000000 | 55.377511 | 3.066669e+03 | 9.000000 | 9.000000 | 13.000000 | 16.000000 | 24.000000 | 38.000000 | 67.000000 | 109.100000 | 147.000000 | 272.010000 | 1073.000000 |
lninc | 5000.0 | 0.0 | 1.849955e+04 | 3.699909 | 3.637586 | 0.747072 | 5.581164e-01 | 2.197225 | 2.197225 | 2.564949 | 2.772589 | 3.178054 | 3.637586 | 4.204693 | 4.692261 | 4.990433 | 5.605839 | 6.978214 |
debtinc | 5000.0 | 0.0 | 4.977080e+04 | 9.954160 | 8.800000 | 6.399783 | 4.095723e+01 | 0.000000 | 0.700000 | 1.900000 | 2.800000 | 5.100000 | 8.800000 | 13.600000 | 18.600000 | 22.200000 | 29.200000 | 43.100000 |
creddebt | 5000.0 | 0.0 | 9.286628e+03 | 1.857326 | 0.926437 | 3.415732 | 1.166722e+01 | 0.000000 | 0.033160 | 0.101088 | 0.175682 | 0.385520 | 0.926437 | 2.063820 | 4.299470 | 6.373010 | 14.280358 | 109.072596 |
lncreddebt | 4999.0 | 1.0 | -6.521372e+02 | -0.130454 | -0.076106 | 1.273058 | 1.620678e+00 | -6.597334 | -3.401690 | -2.291604 | -1.737842 | -0.952685 | -0.076106 | 0.724665 | 1.458625 | 1.852297 | 2.658910 | 4.692014 |
othdebt | 5000.0 | 0.0 | 1.827230e+04 | 3.654460 | 2.098540 | 5.395172 | 2.910788e+01 | 0.000000 | 0.114299 | 0.287692 | 0.457997 | 0.980301 | 2.098540 | 4.314780 | 8.062046 | 11.815981 | 24.064260 | 141.459150 |
lnothdebt | 4999.0 | 1.0 | 3.483879e+03 | 0.696915 | 0.741537 | 1.128578 | 1.273689e+00 | -4.092107 | -2.168241 | -1.243483 | -0.780312 | -0.018987 | 0.741537 | 1.462053 | 2.087178 | 2.469586 | 3.180802 | 4.952011 |
spoused | 5000.0 | 0.0 | 3.056400e+04 | 6.112800 | -1.000000 | 7.743518 | 5.996207e+01 | -1.000000 | -1.000000 | -1.000000 | -1.000000 | -1.000000 | -1.000000 | 14.000000 | 16.000000 | 18.000000 | 20.000000 | 24.000000 |
reside | 5000.0 | 0.0 | 1.102000e+04 | 2.204000 | 2.000000 | 1.393977 | 1.943173e+00 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 1.000000 | 2.000000 | 3.000000 | 4.000000 | 5.000000 | 6.000000 | 9.000000 |
pets | 5000.0 | 0.0 | 1.533700e+04 | 3.067400 | 2.000000 | 3.414497 | 1.165879e+01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 5.000000 | 8.000000 | 10.000000 | 13.000000 | 21.000000 |
pets_cats | 5000.0 | 0.0 | 2.502000e+03 | 0.500400 | 0.000000 | 0.860783 | 7.409480e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 2.000000 | 2.000000 | 3.000000 | 6.000000 |
pets_dogs | 5000.0 | 0.0 | 1.962000e+03 | 0.392400 | 0.000000 | 0.796084 | 6.337490e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 2.000000 | 3.000000 | 7.000000 |
pets_birds | 5000.0 | 0.0 | 5.520000e+02 | 0.110400 | 0.000000 | 0.494227 | 2.442607e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 3.000000 | 5.000000 |
pets_reptiles | 5000.0 | 0.0 | 2.780000e+02 | 0.055600 | 0.000000 | 0.325776 | 1.061299e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 6.000000 |
pets_small | 5000.0 | 0.0 | 5.730000e+02 | 0.114600 | 0.000000 | 0.568798 | 3.235315e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 1.000000 | 3.000000 | 7.000000 |
pets_saltfish | 5000.0 | 0.0 | 2.330000e+02 | 0.046600 | 0.000000 | 0.469545 | 2.204725e-01 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 2.000000 | 8.000000 |
pets_freshfish | 5000.0 | 0.0 | 9.237000e+03 | 1.847400 | 0.000000 | 3.074801 | 9.454404e+00 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 4.000000 | 7.000000 | 8.000000 | 11.000000 | 16.000000 |
carvalue | 5000.0 | 0.0 | 1.161629e+05 | 23.232580 | 17.000000 | 21.231637 | 4.507824e+02 | -1.000000 | -1.000000 | -1.000000 | 2.490000 | 9.200000 | 17.000000 | 31.100000 | 52.910000 | 72.000000 | 92.001000 | 99.600000 |
commutetime | 4998.0 | 2.0 | 1.266770e+05 | 25.345538 | 25.000000 | 5.879149 | 3.456439e+01 | 8.000000 | 13.000000 | 16.000000 | 18.000000 | 21.000000 | 25.000000 | 29.000000 | 33.000000 | 35.000000 | 40.030000 | 48.000000 |
tenure | 5000.0 | 0.0 | 1.910240e+05 | 38.204800 | 38.000000 | 22.661888 | 5.135612e+02 | 0.000000 | 1.000000 | 4.000000 | 7.000000 | 18.000000 | 38.000000 | 59.000000 | 69.000000 | 72.000000 | 72.000000 | 72.000000 |
longmon | 5000.0 | 0.0 | 6.735725e+04 | 13.471450 | 9.550000 | 12.773381 | 1.631593e+02 | 0.900000 | 1.850000 | 2.900000 | 3.700000 | 5.700000 | 9.550000 | 16.550000 | 27.000000 | 36.757500 | 65.201000 | 179.850000 |
lnlongmon | 5000.0 | 0.0 | 1.144390e+04 | 2.288779 | 2.256541 | 0.775178 | 6.009008e-01 | -0.105361 | 0.615186 | 1.064711 | 1.308333 | 1.740466 | 2.256541 | 2.806386 | 3.295837 | 3.604342 | 4.177475 | 5.192123 |
longten | 4997.0 | 3.0 | 3.542232e+06 | 708.871753 | 350.000000 | 979.291072 | 9.590110e+05 | 0.900000 | 2.400000 | 12.620000 | 28.290000 | 104.600000 | 350.000000 | 913.850000 | 1808.840000 | 2567.650000 | 4689.066000 | 13046.500000 |
lnlongten | 4997.0 | 3.0 | 2.803966e+04 | 5.611298 | 5.857933 | 1.649308 | 2.720218e+00 | -0.105361 | 0.875469 | 2.535272 | 3.342505 | 4.650144 | 5.857933 | 6.817666 | 7.500441 | 7.850745 | 8.452988 | 9.476275 |
tollmon | 5000.0 | 0.0 | 6.632225e+04 | 13.264450 | 0.000000 | 16.310018 | 2.660167e+02 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 24.500000 | 35.500000 | 43.500000 | 58.752500 | 173.000000 |
lntollmon | 2378.0 | 2622.0 | 7.712400e+03 | 3.243230 | 3.228826 | 0.404659 | 1.637486e-01 | 2.079442 | 2.345833 | 2.583998 | 2.740840 | 2.970414 | 3.228826 | 3.518980 | 3.789855 | 3.926912 | 4.190524 | 4.622519 |
tollten | 5000.0 | 0.0 | 2.889163e+06 | 577.832510 | 0.000000 | 949.151586 | 9.008887e+05 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 885.450000 | 1918.955000 | 2620.212500 | 3977.270500 | 6923.450000 |
lntollten | 2378.0 | 2622.0 | 1.565861e+04 | 6.584783 | 6.858013 | 1.222040 | 1.493382e+00 | 2.169054 | 2.784527 | 4.208490 | 4.821207 | 5.912218 | 6.858013 | 7.459900 | 7.882781 | 8.106642 | 8.429812 | 8.842669 |
equipmon | 5000.0 | 0.0 | 6.495655e+04 | 12.991310 | 0.000000 | 19.212943 | 3.691372e+02 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 30.800000 | 42.700000 | 49.052500 | 63.300500 | 106.300000 |
lnequipmon | 1704.0 | 3296.0 | 6.134805e+03 | 3.600238 | 3.598681 | 0.283385 | 8.030686e-02 | 2.833213 | 2.967847 | 3.139833 | 3.234749 | 3.412797 | 3.598681 | 3.790138 | 3.971235 | 4.065473 | 4.269466 | 4.666265 |
equipten | 5000.0 | 0.0 | 2.350882e+06 | 470.176400 | 0.000000 | 912.220624 | 8.321465e+05 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 510.162500 | 1912.325000 | 2600.990000 | 3679.457500 | 6525.300000 |
lnequipten | 1704.0 | 3296.0 | 1.149739e+04 | 6.747296 | 7.050556 | 1.199234 | 1.438161e+00 | 2.489065 | 3.169937 | 4.251230 | 5.033244 | 6.171570 | 7.050556 | 7.649835 | 7.977833 | 8.117631 | 8.369037 | 8.783442 |
cardmon | 5000.0 | 0.0 | 7.721925e+04 | 15.443850 | 13.750000 | 15.007569 | 2.252271e+02 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 13.750000 | 22.750000 | 34.000000 | 42.000000 | 64.250000 | 188.500000 |
lncardmon | 3581.0 | 1419.0 | 1.041975e+04 | 2.909733 | 2.904165 | 0.564859 | 3.190658e-01 | 1.178655 | 1.658228 | 1.981001 | 2.169054 | 2.545531 | 2.904165 | 3.295837 | 3.637586 | 3.839452 | 4.239162 | 5.239098 |
cardten | 4998.0 | 2.0 | 3.600951e+06 | 720.478391 | 425.000000 | 922.225527 | 8.504999e+05 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 425.000000 | 1080.000000 | 1871.500000 | 2455.750000 | 4011.200000 | 13705.000000 |
lncardten | 3578.0 | 1422.0 | 2.299333e+04 | 6.426309 | 6.639876 | 1.172050 | 1.373700e+00 | 1.558145 | 2.484907 | 4.094345 | 4.941642 | 5.857933 | 6.639876 | 7.218910 | 7.673223 | 7.923257 | 8.392151 | 9.525516 |
wiremon | 5000.0 | 0.0 | 5.350595e+04 | 10.701190 | 0.000000 | 19.799837 | 3.920335e+02 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 20.962500 | 40.860000 | 51.305000 | 78.304000 | 186.250000 |
lnwiremon | 1344.0 | 3656.0 | 4.845121e+03 | 3.605001 | 3.597997 | 0.390102 | 1.521793e-01 | 2.541602 | 2.806811 | 2.992964 | 3.118613 | 3.330417 | 3.597997 | 3.865193 | 4.102643 | 4.267282 | 4.577186 | 5.227090 |
wireten | 5000.0 | 0.0 | 2.109923e+06 | 421.984610 | 0.000000 | 1001.003287 | 1.002008e+06 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 89.962500 | 1778.535000 | 2687.922500 | 4530.186000 | 12858.650000 |
lnwireten | 1344.0 | 3656.0 | 9.150129e+03 | 6.808132 | 7.147185 | 1.283967 | 1.648571e+00 | 2.541602 | 3.039749 | 4.114134 | 4.881065 | 6.158091 | 7.147185 | 7.755376 | 8.106616 | 8.310817 | 8.690117 | 9.461772 |
hourstv | 5000.0 | 0.0 | 9.822500e+04 | 19.645000 | 20.000000 | 5.165609 | 2.668351e+01 | 0.000000 | 0.000000 | 12.000000 | 14.000000 | 17.000000 | 20.000000 | 23.000000 | 26.000000 | 28.000000 | 31.000000 | 36.000000 |
totalspend | 5000.0 | 0.0 | 2.490393e+06 | 498.078630 | 414.250000 | 351.529270 | 1.235728e+05 | 8.110000 | 58.197600 | 133.106000 | 184.033000 | 276.282500 | 414.250000 | 615.562500 | 908.125000 | 1145.146500 | 1760.102400 | 4881.050000 |
def cat_summary(x):
return pd.Series([x.count(),x.isnull().sum(),x.value_counts(),x.unique()],
index=['N','NMISS','ColumnNames','UniqueValues'])
cat_summary=custdata_df_cat.apply(lambda x:cat_summary(x)).T
cat_summary
N | NMISS | ColumnNames | UniqueValues | |
---|---|---|---|---|
region | 5000 | 0 | 5 1027 1 1009 3 1003 2 995 4 ... | [1, 5, 3, 4, 2] |
townsize | 4998 | 2 | 1.0 1436 2.0 1048 3.0 907 4.0 85... | [2.0, 5.0, 4.0, 3.0, 1.0, nan] |
gender | 5000 | 0 | 1 2518 0 2482 Name: gender, dtype: int64 | [1, 0] |
agecat | 5000 | 0 | 4 1222 5 1195 6 1068 3 893 2 ... | [2, 6, 3, 5, 4] |
birthmonth | 5000 | 0 | September 458 May 451 June ... | [September, May, June, July, August, October, ... |
edcat | 5000 | 0 | 2 1567 4 1111 3 1022 1 946 5 ... | [3, 4, 2, 1, 5] |
jobcat | 5000 | 0 | 2 1640 1 1388 6 688 3 620 5 ... | [1, 2, 3, 6, 4, 5] |
union | 5000 | 0 | 0 4244 1 756 Name: union, dtype: int64 | [1, 0] |
employ | 5000 | 0 | 0 659 1 389 2 318 3 309 4 ... | [0, 16, 1, 22, 10, 11, 15, 19, 8, 4, 12, 3, 27... |
empcat | 5000 | 0 | 2 1180 5 1135 1 1048 3 968 4 ... | [1, 5, 3, 4, 2] |
retire | 5000 | 0 | 0 4262 1 738 Name: retire, dtype: int64 | [0, 1] |
inccat | 5000 | 0 | 2 1797 1 1330 3 839 4 650 5 ... | [2, 1, 4, 3, 5] |
default | 5000 | 0 | 0 3829 1 1171 Name: default, dtype: int64 | [1, 0] |
jobsat | 5000 | 0 | 3 1085 2 1031 4 1016 1 975 5 ... | [1, 4, 2, 5, 3] |
marital | 5000 | 0 | 0 2599 1 2401 Name: marital, dtype: int64 | [0, 1] |
spousedcat | 5000 | 0 | -1 2599 2 789 1 606 3 507 4... | [-1, 2, 4, 3, 1, 5] |
homeown | 5000 | 0 | 1 3148 0 1852 Name: homeown, dtype: int64 | [0, 1] |
hometype | 5000 | 0 | 1 2265 2 1548 3 896 4 291 Name: ... | [2, 3, 1, 4] |
address | 5000 | 0 | 0 245 2 196 4 195 5 177 3 ... | [0, 2, 30, 3, 31, 21, 20, 19, 14, 5, 9, 32, 29... |
addresscat | 5000 | 0 | 3 1221 5 1157 4 1139 2 873 1 ... | [1, 5, 2, 4, 3] |
cars | 5000 | 0 | 2 1607 1 1119 3 1082 0 497 4 ... | [2, 3, 1, 0, 4, 5, 7, 6, 8] |
carown | 5000 | 0 | 1 3704 0 799 -1 497 Name: carown,... | [1, 0, -1] |
cartype | 5000 | 0 | 0 2287 1 2216 -1 497 Name: cartype... | [0, 1, -1] |
carcatvalue | 5000 | 0 | 1 2399 2 1267 3 837 -1 497 Na... | [1, -1, 2, 3] |
carbought | 5000 | 0 | 0 2901 1 1602 -1 497 Name: carboug... | [0, -1, 1] |
carbuy | 5000 | 0 | 0 3195 1 1805 Name: carbuy, dtype: int64 | [0, 1] |
commute | 5000 | 0 | 1 2855 4 635 8 585 5 302 3 ... | [8, 1, 4, 6, 5, 3, 10, 2, 7, 9] |
commutecat | 5000 | 0 | 1 2905 3 981 4 666 2 295 5 ... | [4, 1, 3, 2, 5] |
commutecar | 5000 | 0 | 1 3395 0 1605 Name: commutecar, dtype: i... | [0, 1] |
commutemotorcycle | 5000 | 0 | 0 4487 1 513 Name: commutemotorcycle, d... | [1, 0] |
... | ... | ... | ... | ... |
card2tenurecat | 5000 | 0 | 5 1923 2 1019 3 933 4 760 1 ... | [2, 5, 3, 1, 4] |
active | 5000 | 0 | 0 2670 1 2330 Name: active, dtype: int64 | [0, 1] |
bfast | 5000 | 0 | 3 1875 1 1582 2 1543 Name: bfast, dty... | [3, 1, 2] |
churn | 5000 | 0 | 0 3734 1 1266 Name: churn, dtype: int64 | [1, 0] |
tollfree | 5000 | 0 | 0 2622 1 2378 Name: tollfree, dtype: int64 | [1, 0] |
equip | 5000 | 0 | 0 3296 1 1704 Name: equip, dtype: int64 | [1, 0] |
callcard | 5000 | 0 | 1 3581 0 1419 Name: callcard, dtype: int64 | [1, 0] |
wireless | 5000 | 0 | 0 3656 1 1344 Name: wireless, dtype: int64 | [0, 1] |
multline | 5000 | 0 | 0 2558 1 2442 Name: multline, dtype: int64 | [1, 0] |
voice | 5000 | 0 | 0 3485 1 1515 Name: voice, dtype: int64 | [1, 0] |
pager | 5000 | 0 | 0 3782 1 1218 Name: pager, dtype: int64 | [1, 0] |
internet | 5000 | 0 | 0 2498 1 774 3 598 4 585 2 ... | [0, 4, 2, 3, 1] |
callid | 5000 | 0 | 0 2624 1 2376 Name: callid, dtype: int64 | [0, 1] |
callwait | 5000 | 0 | 0 2605 1 2395 Name: callwait, dtype: int64 | [1, 0] |
forward | 5000 | 0 | 0 2597 1 2403 Name: forward, dtype: int64 | [1, 0] |
confer | 5000 | 0 | 0 2610 1 2390 Name: confer, dtype: int64 | [1, 0] |
ebill | 5000 | 0 | 0 3257 1 1743 Name: ebill, dtype: int64 | [0, 1] |
owntv | 5000 | 0 | 1 4915 0 85 Name: owntv, dtype: int64 | [1, 0] |
ownvcr | 5000 | 0 | 1 4578 0 422 Name: ownvcr, dtype: int64 | [1, 0] |
owndvd | 5000 | 0 | 1 4568 0 432 Name: owndvd, dtype: int64 | [1, 0] |
owncd | 5000 | 0 | 1 4664 0 336 Name: owncd, dtype: int64 | [0, 1] |
ownpda | 5000 | 0 | 0 3995 1 1005 Name: ownpda, dtype: int64 | [0, 1] |
ownpc | 5000 | 0 | 1 3164 0 1836 Name: ownpc, dtype: int64 | [0, 1] |
ownipod | 5000 | 0 | 0 2604 1 2396 Name: ownipod, dtype: int64 | [1, 0] |
owngame | 5000 | 0 | 0 2626 1 2374 Name: owngame, dtype: int64 | [1, 0] |
ownfax | 5000 | 0 | 0 4106 1 894 Name: ownfax, dtype: int64 | [0, 1] |
news | 5000 | 0 | 0 2637 1 2363 Name: news, dtype: int64 | [0, 1] |
response_01 | 5000 | 0 | 0 4582 1 418 Name: response_01, dtype: ... | [0, 1] |
response_02 | 5000 | 0 | 0 4351 1 649 Name: response_02, dtype: ... | [1, 0] |
response_03 | 5000 | 0 | 0 4487 1 513 Name: response_03, dtype: ... | [0, 1] |
84 rows × 4 columns
#Handling Outliers for numerical data - Through function
def outlier_capping(x):
x = x.clip_upper(x.quantile(0.99))
x = x.clip_lower(x.quantile(0.01))
return x
custdata_df_num = custdata_df_num.apply(lambda x: outlier_capping(x))
#Handling missings - by Function (Makethe function to treat all data in one shot)
def Missing_imputation(x):
x = x.fillna(x.mean())
return x
custdata_df_num = custdata_df_num.apply(lambda x: Missing_imputation(x))
#Handling missings - by Function (Makethe function to treat all data in one shot)
def Missing_imputation(x):
x = x.fillna(x.mode())
return x
custdata_df_cat = custdata_df_cat.apply(lambda x: Missing_imputation(x))
# Find the total number of missing values in the numerical data i.e custdata_df_num
print ("\nMissing values in numerical data : ", custdata_df_num.isnull().sum().values.sum())
print ("\nMissing values in categorical data : ", custdata_df_num.isnull().sum().values.sum())
Missing values in numerical data : 0 Missing values in categorical data : 0
# We need to create a function for dummy creation
def create_dummies(df, colname):
col_dummies = pd.get_dummies(df[colname], prefix = colname)
col_dummies.drop(col_dummies.columns[0], axis = 1, inplace = True)
df = pd.concat([df, col_dummies], axis = 1)
df.drop(colname, axis = 1, inplace = True)
return df
custdata_df_cat=custdata_df.select_dtypes(include=['object'])
cat_varlist=list(custdata_df_cat.columns)
# For customer_features in categorical features
for c_feature in cat_varlist:
custdata_df_cat[c_feature]=custdata_df_cat[c_feature].astype('category')
custdata_df_cat=create_dummies(custdata_df_cat,c_feature)
custdata_df_cat.sample(5)
region_2 | region_3 | region_4 | region_5 | townsize_2.0 | townsize_3.0 | townsize_4.0 | townsize_5.0 | gender_1 | agecat_3 | agecat_4 | agecat_5 | agecat_6 | birthmonth_August | birthmonth_December | birthmonth_February | birthmonth_January | birthmonth_July | birthmonth_June | birthmonth_March | birthmonth_May | birthmonth_November | birthmonth_October | birthmonth_September | edcat_2 | edcat_3 | edcat_4 | edcat_5 | jobcat_2 | jobcat_3 | jobcat_4 | jobcat_5 | jobcat_6 | union_1 | employ_1 | employ_2 | employ_3 | employ_4 | employ_5 | employ_6 | employ_7 | employ_8 | employ_9 | employ_10 | employ_11 | employ_12 | employ_13 | employ_14 | employ_15 | employ_16 | employ_17 | employ_18 | employ_19 | employ_20 | employ_21 | employ_22 | employ_23 | employ_24 | employ_25 | employ_26 | employ_27 | employ_28 | employ_29 | employ_30 | employ_31 | employ_32 | employ_33 | employ_34 | employ_35 | employ_36 | employ_37 | employ_38 | employ_39 | employ_40 | employ_41 | employ_42 | employ_43 | employ_44 | employ_45 | employ_46 | employ_47 | employ_48 | employ_49 | employ_51 | employ_52 | empcat_2 | empcat_3 | empcat_4 | empcat_5 | retire_1 | inccat_2 | inccat_3 | inccat_4 | inccat_5 | default_1 | jobsat_2 | jobsat_3 | jobsat_4 | jobsat_5 | marital_1 | spousedcat_1 | spousedcat_2 | spousedcat_3 | spousedcat_4 | spousedcat_5 | homeown_1 | hometype_2 | hometype_3 | hometype_4 | address_1 | address_2 | address_3 | address_4 | address_5 | address_6 | address_7 | address_8 | address_9 | address_10 | address_11 | address_12 | address_13 | address_14 | address_15 | address_16 | address_17 | address_18 | address_19 | address_20 | address_21 | address_22 | address_23 | address_24 | address_25 | address_26 | address_27 | address_28 | address_29 | address_30 | address_31 | address_32 | address_33 | address_34 | address_35 | address_36 | address_37 | address_38 | address_39 | address_40 | address_41 | address_42 | address_43 | address_44 | address_45 | address_46 | address_47 | address_48 | address_49 | address_50 | address_51 | address_52 | address_53 | address_54 | address_55 | address_57 | addresscat_2 | addresscat_3 | addresscat_4 | addresscat_5 | cars_1 | cars_2 | cars_3 | cars_4 | cars_5 | cars_6 | cars_7 | cars_8 | carown_0 | carown_1 | cartype_0 | cartype_1 | carcatvalue_1 | carcatvalue_2 | carcatvalue_3 | carbought_0 | carbought_1 | carbuy_1 | commute_2 | commute_3 | commute_4 | commute_5 | commute_6 | commute_7 | commute_8 | commute_9 | commute_10 | commutecat_2 | commutecat_3 | commutecat_4 | commutecat_5 | commutecar_1 | commutemotorcycle_1 | commutecarpool_1 | commutebus_1 | commuterail_1 | commutepublic_1 | commutebike_1 | commutewalk_1 | commutenonmotor_1 | telecommute_1 | reason_2 | reason_3 | reason_4 | reason_9 | polview_2 | polview_3 | polview_4 | polview_5 | polview_6 | polview_7 | polparty_1 | polcontrib_1 | vote_1 | card_2 | card_3 | card_4 | card_5 | cardtype_2 | cardtype_3 | cardtype_4 | cardbenefit_2 | cardbenefit_3 | cardbenefit_4 | cardfee_1 | cardtenure_1 | cardtenure_2 | cardtenure_3 | cardtenure_4 | cardtenure_5 | cardtenure_6 | cardtenure_7 | cardtenure_8 | cardtenure_9 | cardtenure_10 | cardtenure_11 | cardtenure_12 | cardtenure_13 | cardtenure_14 | cardtenure_15 | cardtenure_16 | cardtenure_17 | cardtenure_18 | cardtenure_19 | cardtenure_20 | cardtenure_21 | cardtenure_22 | cardtenure_23 | cardtenure_24 | cardtenure_25 | cardtenure_26 | cardtenure_27 | cardtenure_28 | cardtenure_29 | cardtenure_30 | cardtenure_31 | cardtenure_32 | cardtenure_33 | cardtenure_34 | cardtenure_35 | cardtenure_36 | cardtenure_37 | cardtenure_38 | cardtenure_39 | cardtenure_40 | cardtenurecat_2 | cardtenurecat_3 | cardtenurecat_4 | cardtenurecat_5 | card2_2 | card2_3 | card2_4 | card2_5 | card2type_2 | card2type_3 | card2type_4 | card2benefit_2 | card2benefit_3 | card2benefit_4 | card2fee_1 | card2tenure_1 | card2tenure_2 | card2tenure_3 | card2tenure_4 | card2tenure_5 | card2tenure_6 | card2tenure_7 | card2tenure_8 | card2tenure_9 | card2tenure_10 | card2tenure_11 | card2tenure_12 | card2tenure_13 | card2tenure_14 | card2tenure_15 | card2tenure_16 | card2tenure_17 | card2tenure_18 | card2tenure_19 | card2tenure_20 | card2tenure_21 | card2tenure_22 | card2tenure_23 | card2tenure_24 | card2tenure_25 | card2tenure_26 | card2tenure_27 | card2tenure_28 | card2tenure_29 | card2tenure_30 | card2tenurecat_2 | card2tenurecat_3 | card2tenurecat_4 | card2tenurecat_5 | active_1 | bfast_2 | bfast_3 | churn_1 | tollfree_1 | equip_1 | callcard_1 | wireless_1 | multline_1 | voice_1 | pager_1 | internet_1 | internet_2 | internet_3 | internet_4 | callid_1 | callwait_1 | forward_1 | confer_1 | ebill_1 | owntv_1 | ownvcr_1 | owndvd_1 | owncd_1 | ownpda_1 | ownpc_1 | ownipod_1 | owngame_1 | ownfax_1 | news_1 | response_01_1 | response_02_1 | response_03_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1760 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
789 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 1 | 0 |
1830 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 |
2970 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
4839 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 |
Now we have nice simple clean data for numerical and categorical both data frame, so now we can merge both data set
custdata_df_new = pd.concat([custdata_df_num, custdata_df_cat], axis=1)
custdata_df_new.head()
age | ed | income | lninc | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | spoused | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | carvalue | commutetime | tenure | longmon | lnlongmon | longten | lnlongten | tollmon | lntollmon | tollten | lntollten | equipmon | lnequipmon | equipten | lnequipten | cardmon | lncardmon | cardten | lncardten | wiremon | lnwiremon | wireten | lnwireten | hourstv | totalspend | region_2 | region_3 | region_4 | region_5 | townsize_2.0 | townsize_3.0 | townsize_4.0 | townsize_5.0 | gender_1 | agecat_3 | agecat_4 | agecat_5 | agecat_6 | birthmonth_August | birthmonth_December | birthmonth_February | birthmonth_January | birthmonth_July | birthmonth_June | birthmonth_March | birthmonth_May | birthmonth_November | birthmonth_October | birthmonth_September | edcat_2 | edcat_3 | edcat_4 | edcat_5 | jobcat_2 | jobcat_3 | jobcat_4 | jobcat_5 | jobcat_6 | union_1 | employ_1 | employ_2 | employ_3 | employ_4 | employ_5 | employ_6 | employ_7 | employ_8 | employ_9 | employ_10 | employ_11 | employ_12 | employ_13 | employ_14 | employ_15 | employ_16 | employ_17 | employ_18 | employ_19 | employ_20 | employ_21 | employ_22 | employ_23 | employ_24 | employ_25 | employ_26 | employ_27 | employ_28 | employ_29 | employ_30 | employ_31 | employ_32 | employ_33 | employ_34 | employ_35 | employ_36 | employ_37 | employ_38 | employ_39 | employ_40 | employ_41 | employ_42 | employ_43 | employ_44 | employ_45 | employ_46 | employ_47 | employ_48 | employ_49 | employ_51 | employ_52 | empcat_2 | empcat_3 | empcat_4 | empcat_5 | retire_1 | inccat_2 | inccat_3 | inccat_4 | inccat_5 | default_1 | jobsat_2 | jobsat_3 | jobsat_4 | jobsat_5 | marital_1 | spousedcat_1 | spousedcat_2 | spousedcat_3 | spousedcat_4 | spousedcat_5 | homeown_1 | hometype_2 | hometype_3 | hometype_4 | address_1 | address_2 | address_3 | address_4 | address_5 | address_6 | address_7 | address_8 | address_9 | address_10 | address_11 | address_12 | address_13 | address_14 | address_15 | address_16 | address_17 | address_18 | address_19 | address_20 | address_21 | address_22 | address_23 | address_24 | address_25 | address_26 | address_27 | address_28 | address_29 | address_30 | address_31 | address_32 | address_33 | address_34 | address_35 | address_36 | address_37 | address_38 | address_39 | address_40 | address_41 | address_42 | address_43 | address_44 | address_45 | address_46 | address_47 | address_48 | address_49 | address_50 | address_51 | address_52 | address_53 | address_54 | address_55 | address_57 | addresscat_2 | addresscat_3 | addresscat_4 | addresscat_5 | cars_1 | cars_2 | cars_3 | cars_4 | cars_5 | cars_6 | cars_7 | cars_8 | carown_0 | carown_1 | cartype_0 | cartype_1 | carcatvalue_1 | carcatvalue_2 | carcatvalue_3 | carbought_0 | carbought_1 | carbuy_1 | commute_2 | commute_3 | commute_4 | commute_5 | commute_6 | commute_7 | commute_8 | commute_9 | commute_10 | commutecat_2 | commutecat_3 | commutecat_4 | commutecat_5 | commutecar_1 | commutemotorcycle_1 | commutecarpool_1 | commutebus_1 | commuterail_1 | commutepublic_1 | commutebike_1 | commutewalk_1 | commutenonmotor_1 | telecommute_1 | reason_2 | reason_3 | reason_4 | reason_9 | polview_2 | polview_3 | polview_4 | polview_5 | polview_6 | polview_7 | polparty_1 | polcontrib_1 | vote_1 | card_2 | card_3 | card_4 | card_5 | cardtype_2 | cardtype_3 | cardtype_4 | cardbenefit_2 | cardbenefit_3 | cardbenefit_4 | cardfee_1 | cardtenure_1 | cardtenure_2 | cardtenure_3 | cardtenure_4 | cardtenure_5 | cardtenure_6 | cardtenure_7 | cardtenure_8 | cardtenure_9 | cardtenure_10 | cardtenure_11 | cardtenure_12 | cardtenure_13 | cardtenure_14 | cardtenure_15 | cardtenure_16 | cardtenure_17 | cardtenure_18 | cardtenure_19 | cardtenure_20 | cardtenure_21 | cardtenure_22 | cardtenure_23 | cardtenure_24 | cardtenure_25 | cardtenure_26 | cardtenure_27 | cardtenure_28 | cardtenure_29 | cardtenure_30 | cardtenure_31 | cardtenure_32 | cardtenure_33 | cardtenure_34 | cardtenure_35 | cardtenure_36 | cardtenure_37 | cardtenure_38 | cardtenure_39 | cardtenure_40 | cardtenurecat_2 | cardtenurecat_3 | cardtenurecat_4 | cardtenurecat_5 | card2_2 | card2_3 | card2_4 | card2_5 | card2type_2 | card2type_3 | card2type_4 | card2benefit_2 | card2benefit_3 | card2benefit_4 | card2fee_1 | card2tenure_1 | card2tenure_2 | card2tenure_3 | card2tenure_4 | card2tenure_5 | card2tenure_6 | card2tenure_7 | card2tenure_8 | card2tenure_9 | card2tenure_10 | card2tenure_11 | card2tenure_12 | card2tenure_13 | card2tenure_14 | card2tenure_15 | card2tenure_16 | card2tenure_17 | card2tenure_18 | card2tenure_19 | card2tenure_20 | card2tenure_21 | card2tenure_22 | card2tenure_23 | card2tenure_24 | card2tenure_25 | card2tenure_26 | card2tenure_27 | card2tenure_28 | card2tenure_29 | card2tenure_30 | card2tenurecat_2 | card2tenurecat_3 | card2tenurecat_4 | card2tenurecat_5 | active_1 | bfast_2 | bfast_3 | churn_1 | tollfree_1 | equip_1 | callcard_1 | wireless_1 | multline_1 | voice_1 | pager_1 | internet_1 | internet_2 | internet_3 | internet_4 | callid_1 | callwait_1 | forward_1 | confer_1 | ebill_1 | owntv_1 | ownvcr_1 | owndvd_1 | owncd_1 | ownpda_1 | ownpc_1 | ownipod_1 | owngame_1 | ownfax_1 | news_1 | response_01_1 | response_02_1 | response_03_1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20.0 | 15.0 | 31.0 | 3.433987 | 11.1 | 1.200909 | 0.183079 | 2.240091 | 0.806516 | -1.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 14.3 | 22.0 | 5.0 | 6.50 | 1.871802 | 34.40 | 3.538057 | 29.0 | 3.367296 | 161.05 | 5.081715 | 29.50 | 3.384390 | 126.1 | 4.837075 | 14.25 | 2.656757 | 60.0 | 4.094345 | 0.00 | 3.604226 | 0.00 | 6.808151 | 13.0 | 149.46 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 |
1 | 22.0 | 17.0 | 15.0 | 2.708050 | 18.6 | 1.222020 | 0.200505 | 1.567980 | 0.449788 | -1.0 | 2.0 | 6.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 | 6.8 | 29.0 | 39.0 | 8.90 | 2.186051 | 330.60 | 5.800909 | 0.0 | 3.242727 | 0.00 | 6.585937 | 54.85 | 4.004602 | 1975.0 | 7.588324 | 16.00 | 2.772589 | 610.0 | 6.413459 | 45.65 | 3.821004 | 1683.55 | 7.428660 | 18.0 | 77.54 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 |
2 | 67.0 | 14.0 | 35.0 | 3.555348 | 9.9 | 0.928620 | -0.074056 | 2.536380 | 0.930738 | 13.0 | 3.0 | 3.0 | 2.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 18.8 | 24.0 | 65.0 | 28.40 | 3.346389 | 1858.35 | 7.527444 | 0.0 | 3.242727 | 0.00 | 6.585937 | 0.00 | 3.599725 | 0.0 | 6.747846 | 23.00 | 3.135494 | 1410.0 | 7.251345 | 0.00 | 3.604226 | 0.00 | 6.808151 | 21.0 | 359.97 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
3 | 23.0 | 16.0 | 20.0 | 2.995732 | 5.7 | 0.033160 | -3.401690 | 1.117200 | 0.110826 | 18.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.7 | 38.0 | 36.0 | 6.00 | 1.791759 | 199.45 | 5.295564 | 0.0 | 3.242727 | 0.00 | 6.585937 | 0.00 | 3.599725 | 0.0 | 6.747846 | 21.00 | 3.044522 | 685.0 | 6.529419 | 0.00 | 3.604226 | 0.00 | 6.808151 | 26.0 | 359.41 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 |
4 | 26.0 | 16.0 | 23.0 | 3.135494 | 1.7 | 0.214659 | -1.538705 | 0.176341 | -1.735336 | 13.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10.6 | 32.0 | 21.0 | 3.05 | 1.115142 | 74.10 | 4.305416 | 16.5 | 2.803360 | 387.70 | 5.960232 | 0.00 | 3.599725 | 0.0 | 6.747846 | 17.25 | 2.847812 | 360.0 | 5.886104 | 19.05 | 2.947067 | 410.80 | 6.018106 | 27.0 | 507.83 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 0 | 1 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 |
# Some features has highly correlation with their log valuesso I am going to drop it.
#custdata_df_new.drop(['cardten', 'lninc','lncreddebt','lnothdebt','lnlongmon','lnlongten','lntollmon','lntollten',
#'lnequipmon','lnequipten','lncardten','lnwiremon','lnwireten'],axis=1, inplace=True)
custdata_df_new.shape
(5000, 400)
# For Linear regressin y(totalspend) should follow normal istribution
import seaborn as sns
sns.distplot(custdata_df_new.totalspend)
<matplotlib.axes._subplots.AxesSubplot at 0xc5866a0>
Now as you can see distribution is right skewed so we need to take log and then plot the graph.
from scipy import stats
import pylab
stats.probplot(custdata_df_new.totalspend, dist="norm", plot=pylab )
pylab.show()
As we can see above graph our data is not distributed normally perfectly, so we will use boxcox techniue to make it perfect normal
from scipy import stats
# transform training data & save lambda value
custdata_df_new['bc_total_spend'],fitted_lambda = stats.boxcox(custdata_df_new['totalspend'])
sns.distplot(custdata_df_new.bc_total_spend)
<matplotlib.axes._subplots.AxesSubplot at 0xc010390>
stats.probplot(custdata_df_new.bc_total_spend, dist="norm", plot=pylab )
pylab.show()
#Drop the y variables(totalspend) from dataframe because we are looking correlation between all x variables
custdata_df_new.drop(['totalspend'],axis=1,inplace=True)
#Splitting the data in all x variable and y variable.
feature_columns=custdata_df_new.columns.difference(['bc_total_spend'])
from sklearn.model_selection import train_test_split
train_x,test_x,train_y,test_y=train_test_split(custdata_df_new[feature_columns],
custdata_df_new['bc_total_spend'],
test_size=0.2,
random_state=12)
print (len(train_x))
print (len(test_x))
print (len(train_y))
print (len(test_y))
4000 1000 4000 1000
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
RandomForestRegressor?
param_grid={'n_estimators':np.arange(10,25)}
tree=GridSearchCV(RandomForestRegressor(oob_score=False,warm_start=True),param_grid,cv=2)
tree.fit(train_x,train_y)
GridSearchCV(cv=2, error_score='raise', estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1, oob_score=False, random_state=None, verbose=0, warm_start=True), fit_params=None, iid=True, n_jobs=1, param_grid={'n_estimators': array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24])}, pre_dispatch='2*n_jobs', refit=True, return_train_score='warn', scoring=None, verbose=0)
tree.best_params_
{'n_estimators': 23}
# we can take n_estimators': 23
radm_clf = RandomForestRegressor(oob_score=True,n_estimators=23)
radm_clf.fit( train_x, train_y)
RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None, max_features='auto', max_leaf_nodes=None, min_impurity_decrease=0.0, min_impurity_split=None, min_samples_leaf=1, min_samples_split=2, min_weight_fraction_leaf=0.0, n_estimators=23, n_jobs=1, oob_score=True, random_state=None, verbose=0, warm_start=False)
radm_clf.oob_score_
0.15767032576065787
from sklearn import metrics
indices = np.argsort(radm_clf.feature_importances_)[::-1]
feature_rank = pd.DataFrame( columns = ['rank', 'feature', 'importance'] )
for f in range(train_x.shape[1]):
feature_rank.loc[f] = [f+1,
train_x.columns[indices[f]],
radm_clf.feature_importances_[indices[f]]]
sns.barplot( y = 'feature', x = 'importance', data = feature_rank )
<matplotlib.axes._subplots.AxesSubplot at 0xc09e4a8>
indices = np.argsort(radm_clf.feature_importances_)[::-1]
feature_rank = pd.DataFrame( columns = ['rank', 'feature', 'importance'] )
for f in range(train_x.shape[1]):
feature_rank.loc[f] = [f+1,
train_x.columns[indices[f]],
radm_clf.feature_importances_[indices[f]]]
feature_rank
rank | feature | importance | |
---|---|---|---|
0 | 1 | income | 1.037454e-01 |
1 | 2 | lninc | 7.589780e-02 |
2 | 3 | card_3 | 2.596244e-02 |
3 | 4 | card_2 | 2.239525e-02 |
4 | 5 | carvalue | 2.154065e-02 |
5 | 6 | card_4 | 2.112358e-02 |
6 | 7 | reason_2 | 2.094437e-02 |
7 | 8 | debtinc | 1.930640e-02 |
8 | 9 | commutetime | 1.846777e-02 |
9 | 10 | hourstv | 1.661541e-02 |
10 | 11 | age | 1.652895e-02 |
11 | 12 | lncreddebt | 1.468481e-02 |
12 | 13 | creddebt | 1.400680e-02 |
13 | 14 | othdebt | 1.242060e-02 |
14 | 15 | ed | 1.229365e-02 |
15 | 16 | lncardmon | 1.194613e-02 |
16 | 17 | tenure | 1.115788e-02 |
17 | 18 | lnothdebt | 1.040137e-02 |
18 | 19 | pets | 1.001150e-02 |
19 | 20 | lncardten | 9.964655e-03 |
20 | 21 | cardmon | 9.230549e-03 |
21 | 22 | lntollmon | 9.129050e-03 |
22 | 23 | longmon | 9.106174e-03 |
23 | 24 | card_5 | 9.043518e-03 |
24 | 25 | spoused | 8.085497e-03 |
25 | 26 | cardten | 7.930730e-03 |
26 | 27 | lnlongten | 7.926722e-03 |
27 | 28 | lnequipten | 7.834682e-03 |
28 | 29 | lnlongmon | 7.799020e-03 |
29 | 30 | longten | 7.240294e-03 |
... | ... | ... | ... |
369 | 370 | employ_45 | 8.859233e-05 |
370 | 371 | address_45 | 7.747346e-05 |
371 | 372 | owntv_1 | 6.995432e-05 |
372 | 373 | reason_3 | 5.521577e-05 |
373 | 374 | address_53 | 5.338786e-05 |
374 | 375 | inccat_5 | 5.133915e-05 |
375 | 376 | employ_38 | 5.067010e-05 |
376 | 377 | address_33 | 4.766309e-05 |
377 | 378 | address_23 | 4.622708e-05 |
378 | 379 | employ_26 | 3.523784e-05 |
379 | 380 | cars_7 | 3.163522e-05 |
380 | 381 | address_46 | 3.095265e-05 |
381 | 382 | address_41 | 1.586377e-05 |
382 | 383 | employ_36 | 1.417776e-05 |
383 | 384 | address_43 | 4.679721e-06 |
384 | 385 | address_49 | 2.893917e-06 |
385 | 386 | address_50 | 2.708603e-06 |
386 | 387 | employ_42 | 7.787210e-07 |
387 | 388 | employ_47 | 1.211027e-07 |
388 | 389 | address_55 | 1.107699e-07 |
389 | 390 | address_52 | 4.552084e-08 |
390 | 391 | address_54 | 9.325467e-09 |
391 | 392 | employ_52 | 0.000000e+00 |
392 | 393 | address_57 | 0.000000e+00 |
393 | 394 | employ_51 | 0.000000e+00 |
394 | 395 | employ_41 | 0.000000e+00 |
395 | 396 | employ_49 | 0.000000e+00 |
396 | 397 | employ_48 | 0.000000e+00 |
397 | 398 | employ_46 | 0.000000e+00 |
398 | 399 | cars_8 | 0.000000e+00 |
399 rows × 3 columns
#Select features and then convert it into list
x=feature_rank.loc[0:75,['feature']]
x=x['feature'].tolist()
print(x)
['income', 'lninc', 'card_3', 'card_2', 'carvalue', 'card_4', 'reason_2', 'debtinc', 'commutetime', 'hourstv', 'age', 'lncreddebt', 'creddebt', 'othdebt', 'ed', 'lncardmon', 'tenure', 'lnothdebt', 'pets', 'lncardten', 'cardmon', 'lntollmon', 'longmon', 'card_5', 'spoused', 'cardten', 'lnlongten', 'lnequipten', 'lnlongmon', 'longten', 'card2_2', 'lntollten', 'lnequipmon', 'tollmon', 'card2_3', 'lnwiremon', 'gender_1', 'reside', 'tollten', 'pets_cats', 'pets_dogs', 'pets_freshfish', 'equipmon', 'commutewalk_1', 'polparty_1', 'lnwireten', 'polview_4', 'card2benefit_3', 'union_1', 'spousedcat_2', 'card2benefit_2', 'card2type_2', 'vote_1', 'carown_0', 'equipten', 'region_3', 'card2_4', 'carbought_1', 'carbuy_1', 'jobsat_4', 'birthmonth_October', 'active_1', 'townsize_3.0', 'cardbenefit_4', 'region_5', 'cartype_0', 'cardtype_2', 'cardtype_4', 'forward_1', 'commutebus_1', 'jobsat_3', 'commutecar_1', 'card2benefit_4', 'townsize_4.0', 'commuterail_1', 'wiremon']
# Create data frame with selected features
rf_features=['income', 'lninc', 'card_3', 'card_2', 'carvalue', 'card_4', 'commutetime', 'reason_2', 'debtinc', 'hourstv', 'creddebt', 'age', 'lncreddebt', 'tenure', 'lncardmon', 'lnothdebt', 'ed', 'othdebt', 'pets', 'lncardten', 'lntollmon', 'cardmon', 'longmon', 'card_5', 'lnlongmon', 'reside', 'spoused', 'cardten', 'lntollten', 'lnequipmon', 'lnlongten', 'tollten', 'longten', 'tollmon', 'lnwiremon', 'card2_2', 'card2_3', 'pets_freshfish', 'lnequipten', 'gender_1', 'pets_dogs', 'equipten', 'wireten', 'pets_cats', 'address_22', 'equipmon', 'wiremon', 'jobsat_4', 'commutebus_1', 'card2benefit_4']
rf_features.append('bc_total_spend')
df_rf= custdata_df_new[rf_features]
df_rf.head(5)
income | lninc | card_3 | card_2 | carvalue | card_4 | commutetime | reason_2 | debtinc | hourstv | creddebt | age | lncreddebt | tenure | lncardmon | lnothdebt | ed | othdebt | pets | lncardten | lntollmon | cardmon | longmon | card_5 | lnlongmon | reside | spoused | cardten | lntollten | lnequipmon | lnlongten | tollten | longten | tollmon | lnwiremon | card2_2 | card2_3 | pets_freshfish | lnequipten | gender_1 | pets_dogs | equipten | wireten | pets_cats | address_22 | equipmon | wiremon | jobsat_4 | commutebus_1 | card2benefit_4 | bc_total_spend | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 31.0 | 3.433987 | 1 | 0 | 14.3 | 0 | 22.0 | 0 | 11.1 | 13.0 | 1.200909 | 20.0 | 0.183079 | 5.0 | 2.656757 | 0.806516 | 15.0 | 2.240091 | 0.0 | 4.094345 | 3.367296 | 14.25 | 6.50 | 0 | 1.871802 | 3.0 | -1.0 | 60.0 | 5.081715 | 3.384390 | 3.538057 | 161.05 | 34.40 | 29.0 | 3.604226 | 0 | 0 | 0.0 | 4.837075 | 1 | 0.0 | 126.1 | 0.00 | 0.0 | 0 | 29.50 | 0.00 | 0 | 0 | 0 | 7.323447 |
1 | 15.0 | 2.708050 | 0 | 1 | 6.8 | 0 | 29.0 | 0 | 18.6 | 18.0 | 1.222020 | 22.0 | 0.200505 | 39.0 | 2.772589 | 0.449788 | 17.0 | 1.567980 | 6.0 | 6.413459 | 3.242727 | 16.00 | 8.90 | 0 | 2.186051 | 2.0 | -1.0 | 610.0 | 6.585937 | 4.004602 | 5.800909 | 0.00 | 330.60 | 0.0 | 3.821004 | 0 | 0 | 6.0 | 7.588324 | 0 | 0.0 | 1975.0 | 1683.55 | 0.0 | 0 | 54.85 | 45.65 | 0 | 1 | 0 | 6.039640 |
2 | 35.0 | 3.555348 | 0 | 1 | 18.8 | 0 | 24.0 | 1 | 9.9 | 21.0 | 0.928620 | 67.0 | -0.074056 | 65.0 | 3.135494 | 0.930738 | 14.0 | 2.536380 | 3.0 | 7.251345 | 3.242727 | 23.00 | 28.40 | 0 | 3.346389 | 3.0 | 13.0 | 1410.0 | 6.585937 | 3.599725 | 7.527444 | 0.00 | 1858.35 | 0.0 | 3.604226 | 0 | 0 | 0.0 | 6.747846 | 1 | 1.0 | 0.0 | 0.00 | 2.0 | 0 | 0.00 | 0.00 | 1 | 1 | 0 | 9.243615 |
3 | 20.0 | 2.995732 | 0 | 1 | 8.7 | 0 | 38.0 | 0 | 5.7 | 26.0 | 0.033160 | 23.0 | -3.401690 | 36.0 | 3.044522 | 0.110826 | 16.0 | 1.117200 | 0.0 | 6.529419 | 3.242727 | 21.00 | 6.00 | 0 | 1.791759 | 5.0 | 18.0 | 685.0 | 6.585937 | 3.599725 | 5.295564 | 0.00 | 199.45 | 0.0 | 3.604226 | 0 | 1 | 0.0 | 6.747846 | 0 | 0.0 | 0.0 | 0.00 | 0.0 | 0 | 0.00 | 0.00 | 0 | 0 | 1 | 9.239995 |
4 | 23.0 | 3.135494 | 0 | 0 | 10.6 | 1 | 32.0 | 0 | 1.7 | 27.0 | 0.214659 | 26.0 | -1.538705 | 21.0 | 2.847812 | -1.735336 | 16.0 | 0.176341 | 0.0 | 5.886104 | 2.803360 | 17.25 | 3.05 | 0 | 1.115142 | 4.0 | 13.0 | 360.0 | 5.960232 | 3.599725 | 4.305416 | 387.70 | 74.10 | 16.5 | 2.947067 | 0 | 0 | 0.0 | 6.747846 | 0 | 0.0 | 0.0 | 410.80 | 0.0 | 0 | 0.00 | 19.05 | 0 | 0 | 0 | 10.063779 |
import statsmodels as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
from patsy import dmatrices
del rf_features[-1]
%%capture
#gather features
features = "+".join(rf_features)
features
'income+lninc+card_3+card_2+carvalue+card_4+commutetime+reason_2+debtinc+hourstv+creddebt+age+lncreddebt+tenure+lncardmon+lnothdebt+ed+othdebt+pets+lncardten+lntollmon+cardmon+longmon+card_5+lnlongmon+reside+spoused+cardten+lntollten+lnequipmon+lnlongten+tollten+longten+tollmon+lnwiremon+card2_2+card2_3+pets_freshfish+lnequipten+gender_1+pets_dogs+equipten+wireten+pets_cats+address_22+equipmon+wiremon+jobsat_4+commutebus_1+card2benefit_4'
# get y and X dataframes based on this regression:
y, X = dmatrices('bc_total_spend~' + features,df_rf, return_type='dataframe')
# For each X, calculate VIF and save in dataframe
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif["features"] = X.columns
vif.sort_values(by=['VIF Factor'],ascending=False)
VIF Factor | features | |
---|---|---|
0 | 1686.859676 | Intercept |
23 | 148.928795 | longmon |
33 | 117.386519 | longten |
25 | 33.762250 | lnlongmon |
31 | 31.415359 | lnlongten |
14 | 24.098226 | tenure |
32 | 21.097455 | tollten |
42 | 19.936232 | equipten |
34 | 15.002808 | tollmon |
46 | 14.944802 | equipmon |
28 | 14.474511 | cardten |
19 | 13.324262 | pets |
47 | 13.134309 | wiremon |
43 | 12.799070 | wireten |
38 | 11.869724 | pets_freshfish |
22 | 10.962786 | cardmon |
2 | 9.948275 | lninc |
1 | 9.725833 | income |
29 | 8.130066 | lntollten |
20 | 7.905704 | lncardten |
16 | 7.517424 | lnothdebt |
39 | 6.498882 | lnequipten |
9 | 6.486941 | debtinc |
15 | 5.797448 | lncardmon |
18 | 5.167748 | othdebt |
13 | 4.276376 | lncreddebt |
11 | 3.947735 | creddebt |
5 | 3.534663 | carvalue |
21 | 3.065122 | lntollmon |
30 | 2.381248 | lnequipmon |
26 | 2.006284 | reside |
12 | 1.972903 | age |
4 | 1.882678 | card_2 |
3 | 1.856482 | card_3 |
27 | 1.833635 | spoused |
44 | 1.833349 | pets_cats |
6 | 1.788595 | card_4 |
41 | 1.742336 | pets_dogs |
35 | 1.599433 | lnwiremon |
17 | 1.465111 | ed |
37 | 1.311558 | card2_3 |
36 | 1.294873 | card2_2 |
24 | 1.257129 | card_5 |
48 | 1.039194 | jobsat_4 |
10 | 1.037751 | hourstv |
8 | 1.022514 | reason_2 |
40 | 1.021022 | gender_1 |
45 | 1.019974 | address_22 |
7 | 1.015482 | commutetime |
49 | 1.013662 | commutebus_1 |
50 | 1.006757 | card2benefit_4 |
#Select only those features whose Vif is greater than 10.
vif1=vif[vif['VIF Factor']>10].reset_index().loc[:,['features']]
vif1.drop([0],axis=0,inplace=True)
drop_vars= vif1["features"].tolist()
drop_vars
['tenure', 'pets', 'cardmon', 'longmon', 'lnlongmon', 'cardten', 'lnlongten', 'tollten', 'longten', 'tollmon', 'pets_freshfish', 'equipten', 'wireten', 'equipmon', 'wiremon']
# dropping variables that have VIF greater than 10
df_rf.drop(drop_vars,axis=1,inplace=True)
df_rf.shape
(5000, 36)
sns.lmplot(x="income",y="bc_total_spend",data=df_rf,aspect=5,scatter_kws={'alpha':0.5})
<seaborn.axisgrid.FacetGrid at 0xb319320>
# Dropping variables one at a time which have p-values greater than 5%
feature_columns=df_rf.columns.difference(['bc_total_spend','address_22','pets_cats','lncardmon','lnequipten','spoused'
,'creddebt','reside','othdebt','debtinc','lnothdebt','jobsat_4','income','carvalue',
'card2benefit_4','commutebus_1','hourstv','lnwiremon','pets_dogs','lncardten',
'commutetime','lntollten','lntollmon','lnequipmon'])
train_x,test_x,train_y,test_y=train_test_split(df_rf[feature_columns],
df_rf['bc_total_spend'],
test_size=.2,
random_state=123)
import statsmodels.api as sm
train_x = sm.add_constant(train_x)
test_x=sm.add_constant(test_x)
lm=sm.OLS(train_y,train_x).fit()
print(lm.summary())
OLS Regression Results ============================================================================== Dep. Variable: bc_total_spend R-squared: 0.325 Model: OLS Adj. R-squared: 0.323 Method: Least Squares F-statistic: 159.7 Date: Tue, 01 Oct 2019 Prob (F-statistic): 0.00 Time: 22:46:22 Log-Likelihood: -6598.7 No. Observations: 4000 AIC: 1.322e+04 Df Residuals: 3987 BIC: 1.331e+04 Df Model: 12 Covariance Type: nonrobust ============================================================================== coef std err t P>|t| [0.025 0.975] ------------------------------------------------------------------------------ const 8.2401 0.172 47.920 0.000 7.903 8.577 age -0.0042 0.001 -3.681 0.000 -0.006 -0.002 card2_2 -0.3794 0.052 -7.349 0.000 -0.481 -0.278 card2_3 -0.3452 0.051 -6.788 0.000 -0.445 -0.246 card_2 -1.2433 0.064 -19.525 0.000 -1.368 -1.118 card_3 -1.2391 0.064 -19.465 0.000 -1.364 -1.114 card_4 -1.2693 0.060 -21.022 0.000 -1.388 -1.151 card_5 -0.9928 0.108 -9.214 0.000 -1.204 -0.782 ed -0.0154 0.007 -2.349 0.019 -0.028 -0.003 gender_1 -0.1489 0.040 -3.713 0.000 -0.227 -0.070 lncreddebt 0.0396 0.020 1.997 0.046 0.001 0.079 lninc 0.8090 0.034 23.600 0.000 0.742 0.876 reason_2 0.7071 0.081 8.746 0.000 0.549 0.866 ============================================================================== Omnibus: 4.553 Durbin-Watson: 1.976 Prob(Omnibus): 0.103 Jarque-Bera (JB): 4.573 Skew: -0.082 Prob(JB): 0.102 Kurtosis: 2.983 Cond. No. 473. ============================================================================== Warnings: [1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
lm.pvalues.sort_values(ascending=False)
lncreddebt 4.587652e-02 ed 1.887734e-02 age 2.357317e-04 gender_1 2.078595e-04 card2_3 1.303811e-11 card2_2 2.408523e-13 reason_2 3.210677e-18 card_5 4.947423e-20 card_3 1.090371e-80 card_2 3.707595e-81 card_4 3.890897e-93 lninc 2.235272e-115 const 0.000000e+00 dtype: float64
print('Parameters:', lm.params) # Find the parameters of x i.e Beta value
print('R2: ', lm.rsquared) # Find the r**2
Parameters: const 8.240115 age -0.004194 card2_2 -0.379415 card2_3 -0.345223 card_2 -1.243274 card_3 -1.239082 card_4 -1.269314 card_5 -0.992801 ed -0.015444 gender_1 -0.148872 lncreddebt 0.039624 lninc 0.809044 reason_2 0.707096 dtype: float64 R2: 0.3246950341215388
test_pred=lm.predict(test_x)
train_pred=lm.predict(train_x)
from sklearn import metrics
print('MSE Test:',metrics.mean_squared_error(test_y,test_pred))
print('MSE Train:',metrics.mean_squared_error(train_y,train_pred))
MSE Test: 1.5275403610908898 MSE Train: 1.5864409028377782
print ('MAE:', metrics.mean_absolute_error(test_y, test_pred))
print ('MSE:', metrics.mean_squared_error(test_y, test_pred))
print ('RMSE:', np.sqrt(metrics.mean_squared_error(test_y, test_pred)))
MAE: 0.9762061585444958 MSE: 1.5275403610908898 RMSE: 1.2359370376725871
MAPE_train = '%.3f' % np.mean(np.abs(train_y-train_pred)/(train_y))
MAPE_test = '%.3f' % np.mean(np.abs(test_y-test_pred)/(test_y))
# print the values of MAPE for train and test
print('MAPE of training data: ', MAPE_train, ' | ', 'MAPE of testing data: ', MAPE_test)
MAPE of training data: 0.110 | MAPE of testing data: 0.106
We will use Q-Q plot to examine this
residuals=train_y-train_pred
import seaborn as sns
sns.distplot(residuals)
<matplotlib.axes._subplots.AxesSubplot at 0xf889278>
from scipy import stats
import pylab
stats.probplot(residuals,dist='norm',plot=pylab)
pylab.show()
From the above histogram and Q-Q plot, shows that the residuals are normally distributed, so our assumption is not violated
We are going to credit card sales on Testing data
df_rf=df_rf[feature_columns]
df_rf=sm.add_constant(df_rf)
df_rf.head()
const | age | card2_2 | card2_3 | card_2 | card_3 | card_4 | card_5 | ed | gender_1 | lncreddebt | lninc | reason_2 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 20.0 | 0 | 0 | 0 | 1 | 0 | 0 | 15.0 | 1 | 0.183079 | 3.433987 | 0 |
1 | 1.0 | 22.0 | 0 | 0 | 1 | 0 | 0 | 0 | 17.0 | 0 | 0.200505 | 2.708050 | 0 |
2 | 1.0 | 67.0 | 0 | 0 | 1 | 0 | 0 | 0 | 14.0 | 1 | -0.074056 | 3.555348 | 1 |
3 | 1.0 | 23.0 | 0 | 1 | 1 | 0 | 0 | 0 | 16.0 | 0 | -3.401690 | 2.995732 | 0 |
4 | 1.0 | 26.0 | 0 | 0 | 0 | 0 | 1 | 0 | 16.0 | 0 | -1.538705 | 3.135494 | 0 |
# Final prediction on test file
from scipy.special import boxcox, inv_boxcox
pred_total_spend=inv_boxcox(lm.predict(df_rf),fitted_lambda)
pred_total_spend=pd.DataFrame(pred_total_spend.tolist())
pred_total_spend.columns = ['pred_total_spend']
#Concatenating final prediction with original test file
testfile = pd.concat([custdata_df, pred_total_spend], axis=1)
testfile.head()
region | townsize | gender | age | agecat | birthmonth | ed | edcat | jobcat | union | employ | empcat | retire | income | lninc | inccat | debtinc | creddebt | lncreddebt | othdebt | lnothdebt | default | jobsat | marital | spoused | spousedcat | reside | pets | pets_cats | pets_dogs | pets_birds | pets_reptiles | pets_small | pets_saltfish | pets_freshfish | homeown | hometype | address | addresscat | cars | carown | cartype | carvalue | carcatvalue | carbought | carbuy | commute | commutecat | commutetime | commutecar | commutemotorcycle | commutecarpool | commutebus | commuterail | commutepublic | commutebike | commutewalk | commutenonmotor | telecommute | reason | polview | polparty | polcontrib | vote | card | cardtype | cardbenefit | cardfee | cardtenure | cardtenurecat | card2 | card2type | card2benefit | card2fee | card2tenure | card2tenurecat | active | bfast | tenure | churn | longmon | lnlongmon | longten | lnlongten | tollfree | tollmon | lntollmon | tollten | lntollten | equip | equipmon | lnequipmon | equipten | lnequipten | callcard | cardmon | lncardmon | cardten | lncardten | wireless | wiremon | lnwiremon | wireten | lnwireten | multline | voice | pager | internet | callid | callwait | forward | confer | ebill | owntv | hourstv | ownvcr | owndvd | owncd | ownpda | ownpc | ownipod | owngame | ownfax | news | response_01 | response_02 | response_03 | totalspend | pred_total_spend | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 2 | 1 | 20 | 2 | September | 15 | 3 | 1 | 1 | 0 | 1 | 0 | 31 | 3.433987 | 2 | 11.1 | 1.200909 | 0.183079 | 2.240091 | 0.806516 | 1 | 1 | 0 | -1 | -1 | 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | 2 | 1 | 0 | 14.3 | 1 | 0 | 0 | 8 | 4 | 22.0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 9 | 6 | 1 | 0 | 1 | 3 | 1 | 1 | 0 | 2 | 2 | 5 | 3 | 1 | 0 | 3 | 2 | 0 | 3 | 5 | 1 | 6.50 | 1.871802 | 34.40 | 3.538057 | 1 | 29.0 | 3.367296 | 161.05 | 5.081715 | 1 | 29.50 | 3.384390 | 126.1 | 4.837075 | 1 | 14.25 | 2.656757 | 60.0 | 4.094345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 1 | 1 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 13 | 1 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 149.46 | 372.303627 |
1 | 5 | 5 | 0 | 22 | 2 | May | 17 | 4 | 2 | 0 | 0 | 1 | 0 | 15 | 2.708050 | 1 | 18.6 | 1.222020 | 0.200505 | 1.567980 | 0.449788 | 1 | 1 | 0 | -1 | -1 | 2 | 6 | 0 | 0 | 0 | 0 | 0 | 0 | 6 | 1 | 3 | 2 | 1 | 2 | 1 | 1 | 6.8 | 1 | 0 | 0 | 1 | 1 | 29.0 | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 1 | 1 | 9 | 4 | 1 | 0 | 0 | 2 | 4 | 1 | 0 | 4 | 2 | 4 | 1 | 3 | 0 | 4 | 2 | 1 | 1 | 39 | 0 | 8.90 | 2.186051 | 330.60 | 5.800909 | 0 | 0.0 | NaN | 0.00 | NaN | 1 | 54.85 | 4.004602 | 1975.0 | 7.588324 | 1 | 16.00 | 2.772589 | 610.0 | 6.413459 | 1 | 45.65 | 3.821004 | 1683.55 | 7.428660 | 1 | 1 | 1 | 4 | 1 | 0 | 1 | 0 | 1 | 1 | 18 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 1 | 0 | 0 | 0 | 77.54 | 302.061632 |
2 | 3 | 4 | 1 | 67 | 6 | June | 14 | 2 | 2 | 0 | 16 | 5 | 0 | 35 | 3.555348 | 2 | 9.9 | 0.928620 | -0.074056 | 2.536380 | 0.930738 | 0 | 4 | 1 | 13 | 2 | 3 | 3 | 2 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 30 | 5 | 3 | 1 | 1 | 18.8 | 1 | 0 | 1 | 4 | 3 | 24.0 | 1 | 0 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 2 | 5 | 1 | 0 | 0 | 2 | 1 | 4 | 0 | 35 | 5 | 4 | 1 | 3 | 0 | 25 | 5 | 0 | 3 | 65 | 0 | 28.40 | 3.346389 | 1858.35 | 7.527444 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 23.00 | 3.135494 | 1410.0 | 7.251345 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 21 | 1 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 359.97 | 480.932947 |
3 | 4 | 3 | 0 | 23 | 2 | May | 16 | 3 | 2 | 0 | 0 | 1 | 0 | 20 | 2.995732 | 1 | 5.7 | 0.022800 | -3.780995 | 1.117200 | 0.110826 | 1 | 2 | 1 | 18 | 4 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 3 | 2 | 3 | 1 | 1 | 8.7 | 1 | 0 | 1 | 1 | 1 | 38.0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 9 | 3 | 0 | 0 | 0 | 2 | 1 | 4 | 0 | 5 | 2 | 3 | 2 | 4 | 0 | 5 | 2 | 1 | 1 | 36 | 0 | 6.00 | 1.791759 | 199.45 | 5.295564 | 0 | 0.0 | NaN | 0.00 | NaN | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 21.00 | 3.044522 | 685.0 | 6.529419 | 0 | 0.00 | NaN | 0.00 | NaN | 1 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 1 | 1 | 26 | 1 | 1 | 1 | 0 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | 0 | 359.41 | 271.020145 |
4 | 2 | 2 | 0 | 26 | 3 | July | 16 | 3 | 2 | 0 | 1 | 1 | 0 | 23 | 3.135494 | 1 | 1.7 | 0.214659 | -1.538705 | 0.176341 | -1.735336 | 0 | 1 | 1 | 13 | 2 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 3 | 2 | 1 | 0 | 1 | 10.6 | 1 | 0 | 1 | 6 | 3 | 32.0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 9 | 4 | 0 | 0 | 0 | 4 | 2 | 1 | 0 | 8 | 3 | 1 | 3 | 2 | 0 | 9 | 3 | 1 | 3 | 21 | 0 | 3.05 | 1.115142 | 74.10 | 4.305416 | 1 | 16.5 | 2.803360 | 387.70 | 5.960232 | 0 | 0.00 | NaN | 0.0 | NaN | 1 | 17.25 | 2.847812 | 360.0 | 5.886104 | 1 | 19.05 | 2.947067 | 410.80 | 6.018106 | 0 | 1 | 0 | 3 | 1 | 1 | 1 | 1 | 0 | 1 | 27 | 1 | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 507.83 | 336.914886 |
#Export the final result in csv.
%cd C:\\Users\\ashwini\\Desktop
testfile.to_csv('Final_submission_credit_spend.csv',index=False)
C:\Users\ashwini\Desktop
##################End######################